From 22ca63982be6468ebce6335a4d6aa9c16f70c3f1 Mon Sep 17 00:00:00 2001 From: Jason Date: Wed, 10 Aug 2022 10:50:22 +0800 Subject: [PATCH] Refine code structure (#89) * refine code structure * refine code structure --- CMakeLists.txt | 35 +- csrcs/fastdeploy/CMakeLists.txt | 0 csrcs/fastdeploy/backends/backend.h | 49 - .../backends/common/multiclass_nms.cc | 224 - .../backends/common/multiclass_nms.h | 45 - .../backends/ort/ops/multiclass_nms.cc | 261 -- .../backends/ort/ops/multiclass_nms.h | 81 - csrcs/fastdeploy/backends/ort/ort_backend.cc | 279 -- csrcs/fastdeploy/backends/ort/ort_backend.h | 93 - csrcs/fastdeploy/backends/ort/utils.cc | 67 - csrcs/fastdeploy/backends/ort/utils.h | 39 - .../backends/paddle/paddle_backend.cc | 105 - .../backends/paddle/paddle_backend.h | 78 - csrcs/fastdeploy/backends/paddle/util.cc | 76 - .../backends/tensorrt/common/BatchStream.h | 342 -- .../backends/tensorrt/common/CPPLINT.cfg | 1 - .../tensorrt/common/EntropyCalibrator.h | 118 - .../backends/tensorrt/common/ErrorRecorder.h | 115 - .../backends/tensorrt/common/README.md | 1 - .../backends/tensorrt/common/argsParser.h | 169 - .../backends/tensorrt/common/buffers.h | 426 -- .../backends/tensorrt/common/common.h | 844 ---- .../backends/tensorrt/common/getOptions.cpp | 223 - .../backends/tensorrt/common/getOptions.h | 128 - .../backends/tensorrt/common/half.h | 3787 ----------------- .../backends/tensorrt/common/logger.cpp | 38 - .../backends/tensorrt/common/logger.h | 35 - .../backends/tensorrt/common/logging.h | 573 --- .../tensorrt/common/parserOnnxConfig.h | 126 - .../backends/tensorrt/common/safeCommon.h | 65 - .../backends/tensorrt/common/sampleConfig.h | 251 -- .../backends/tensorrt/common/sampleDevice.h | 397 -- .../tensorrt/common/sampleEngines.cpp | 1710 -------- .../backends/tensorrt/common/sampleEngines.h | 195 - .../tensorrt/common/sampleInference.cpp | 943 ---- .../tensorrt/common/sampleInference.h | 88 - .../tensorrt/common/sampleOptions.cpp | 1634 ------- .../backends/tensorrt/common/sampleOptions.h | 311 -- .../tensorrt/common/sampleReporting.cpp | 480 --- .../tensorrt/common/sampleReporting.h | 211 - .../backends/tensorrt/common/sampleUtils.h | 494 --- .../backends/tensorrt/common/windows/getopt.c | 568 --- .../backends/tensorrt/common/windows/getopt.h | 124 - .../backends/tensorrt/trt_backend.cc | 528 --- .../backends/tensorrt/trt_backend.h | 113 - csrcs/fastdeploy/core/config.h.in | 54 - csrcs/fastdeploy/core/fd_tensor.cc | 134 - csrcs/fastdeploy/core/fd_tensor.h | 87 - csrcs/fastdeploy/core/fd_type.cc | 123 - csrcs/fastdeploy/core/fd_type.h | 63 - csrcs/fastdeploy/fastdeploy_model.cc | 145 - csrcs/fastdeploy/fastdeploy_model.h | 67 - csrcs/fastdeploy/fastdeploy_runtime.cc | 365 -- csrcs/fastdeploy/fastdeploy_runtime.h | 159 - csrcs/fastdeploy/function/eigen.cc | 32 - csrcs/fastdeploy/function/eigen.h | 109 - csrcs/fastdeploy/function/reduce.cc | 246 -- csrcs/fastdeploy/function/reduce.h | 100 - csrcs/fastdeploy/function/reduce_functor.h | 76 - csrcs/fastdeploy/pybind/fastdeploy_model.cc | 35 - csrcs/fastdeploy/pybind/fastdeploy_runtime.cc | 134 - csrcs/fastdeploy/pybind/main.cc.in | 127 - csrcs/fastdeploy/pybind/main.h | 90 - csrcs/fastdeploy/text.h | 19 - csrcs/fastdeploy/text/common/option.h | 26 - csrcs/fastdeploy/text/common/result.cc | 18 - csrcs/fastdeploy/text/common/result.h | 23 - .../text/postprocessor/postprocessor.cc | 31 - .../text/postprocessor/postprocessor.h | 34 - .../text/preprocessor/preprocessor.cc | 32 - .../text/preprocessor/preprocessor.h | 34 - csrcs/fastdeploy/text/text_model.cc | 79 - csrcs/fastdeploy/text/text_model.h | 51 - csrcs/fastdeploy/text/text_pybind.cc | 13 - csrcs/fastdeploy/utils/perf.h | 49 - csrcs/fastdeploy/utils/unique_ptr.h | 58 - csrcs/fastdeploy/utils/utils.cc | 49 - csrcs/fastdeploy/utils/utils.h | 150 - csrcs/fastdeploy/vision.h | 41 - csrcs/fastdeploy/vision/AddModel.md | 3 - .../vision/common/processors/base.cc | 61 - .../vision/common/processors/base.h | 48 - .../vision/common/processors/cast.cc | 64 - .../vision/common/processors/cast.h | 37 - .../vision/common/processors/center_crop.cc | 63 - .../vision/common/processors/center_crop.h | 40 - .../common/processors/color_space_convert.cc | 58 - .../common/processors/color_space_convert.h | 44 - .../vision/common/processors/convert.cc | 62 - .../vision/common/processors/convert.h | 42 - .../vision/common/processors/hwc2chw.cc | 75 - .../vision/common/processors/hwc2chw.h | 33 - .../vision/common/processors/mat.cc | 117 - .../fastdeploy/vision/common/processors/mat.h | 80 - .../vision/common/processors/normalize.cc | 88 - .../vision/common/processors/normalize.h | 53 - .../vision/common/processors/pad.cc | 100 - .../fastdeploy/vision/common/processors/pad.h | 50 - .../vision/common/processors/pad_to_size.cc | 141 - .../vision/common/processors/pad_to_size.h | 46 - .../vision/common/processors/resize.cc | 90 - .../vision/common/processors/resize.h | 63 - .../common/processors/resize_by_short.cc | 76 - .../common/processors/resize_by_short.h | 49 - .../vision/common/processors/stride_pad.cc | 124 - .../vision/common/processors/stride_pad.h | 44 - .../vision/common/processors/transform.h | 27 - csrcs/fastdeploy/vision/common/result.cc | 306 -- csrcs/fastdeploy/vision/common/result.h | 148 - .../vision/detection/contrib/nanodet_plus.cc | 355 -- .../vision/detection/contrib/nanodet_plus.h | 101 - .../detection/contrib/nanodet_plus_pybind.cc | 39 - .../vision/detection/contrib/scaledyolov4.cc | 255 -- .../vision/detection/contrib/scaledyolov4.h | 103 - .../detection/contrib/scaledyolov4_pybind.cc | 41 - .../vision/detection/contrib/yolor.cc | 253 -- .../vision/detection/contrib/yolor.h | 102 - .../vision/detection/contrib/yolor_pybind.cc | 37 - .../vision/detection/contrib/yolov5.cc | 295 -- .../vision/detection/contrib/yolov5.h | 108 - .../vision/detection/contrib/yolov5_pybind.cc | 38 - .../vision/detection/contrib/yolov5lite.cc | 399 -- .../vision/detection/contrib/yolov5lite.h | 138 - .../detection/contrib/yolov5lite_pybind.cc | 43 - .../vision/detection/contrib/yolov6.cc | 267 -- .../vision/detection/contrib/yolov6.h | 108 - .../vision/detection/contrib/yolov6_pybind.cc | 37 - .../vision/detection/contrib/yolov7.cc | 253 -- .../vision/detection/contrib/yolov7.h | 100 - .../vision/detection/contrib/yolov7_pybind.cc | 37 - .../vision/detection/contrib/yolox.cc | 339 -- .../vision/detection/contrib/yolox.h | 107 - .../vision/detection/contrib/yolox_pybind.cc | 37 - .../vision/detection/detection_pybind.cc | 42 - .../fastdeploy/vision/detection/ppdet/model.h | 21 - .../vision/detection/ppdet/picodet.cc | 66 - .../vision/detection/ppdet/picodet.h | 36 - .../vision/detection/ppdet/ppdet_pybind.cc | 95 - .../vision/detection/ppdet/ppyolo.cc | 78 - .../vision/detection/ppdet/ppyolo.h | 51 - .../vision/detection/ppdet/ppyoloe.cc | 258 -- .../vision/detection/ppdet/ppyoloe.h | 68 - .../fastdeploy/vision/detection/ppdet/rcnn.cc | 84 - .../fastdeploy/vision/detection/ppdet/rcnn.h | 39 - .../vision/detection/ppdet/yolov3.cc | 64 - .../vision/detection/ppdet/yolov3.h | 35 - .../vision/detection/ppdet/yolox.cc | 72 - .../fastdeploy/vision/detection/ppdet/yolox.h | 35 - .../vision/facedet/contrib/retinaface.cc | 310 -- .../vision/facedet/contrib/retinaface.h | 92 - .../facedet/contrib/retinaface_pybind.cc | 38 - .../vision/facedet/contrib/scrfd.cc | 369 -- .../fastdeploy/vision/facedet/contrib/scrfd.h | 122 - .../vision/facedet/contrib/scrfd_pybind.cc | 45 - .../vision/facedet/contrib/ultraface.cc | 221 - .../vision/facedet/contrib/ultraface.h | 84 - .../facedet/contrib/ultraface_pybind.cc | 31 - .../vision/facedet/contrib/yolov5face.cc | 294 -- .../vision/facedet/contrib/yolov5face.h | 97 - .../facedet/contrib/yolov5face_pybind.cc | 41 - .../vision/facedet/facedet_pybind.cc | 31 - .../vision/faceid/contrib/arcface.cc | 83 - .../vision/faceid/contrib/arcface.h | 65 - .../vision/faceid/contrib/arcface_pybind.cc | 37 - .../vision/faceid/contrib/cosface.cc | 83 - .../vision/faceid/contrib/cosface.h | 66 - .../vision/faceid/contrib/cosface_pybind.cc | 37 - .../vision/faceid/contrib/insightface_rec.cc | 153 - .../vision/faceid/contrib/insightface_rec.h | 72 - .../faceid/contrib/insightface_rec_pybind.cc | 42 - .../vision/faceid/contrib/partial_fc.cc | 84 - .../vision/faceid/contrib/partial_fc.h | 64 - .../faceid/contrib/partial_fc_pybind.cc | 37 - csrcs/fastdeploy/vision/faceid/contrib/vpl.cc | 82 - csrcs/fastdeploy/vision/faceid/contrib/vpl.h | 65 - .../vision/faceid/contrib/vpl_pybind.cc | 37 - .../fastdeploy/vision/faceid/faceid_pybind.cc | 33 - .../vision/matting/contrib/modnet.cc | 175 - .../vision/matting/contrib/modnet.h | 70 - .../vision/matting/contrib/modnet_pybind.cc | 35 - .../vision/matting/matting_pybind.cc | 26 - csrcs/fastdeploy/vision/ppcls/model.cc | 153 - csrcs/fastdeploy/vision/ppcls/model.h | 51 - csrcs/fastdeploy/vision/ppcls/ppcls_pybind.cc | 30 - csrcs/fastdeploy/vision/ppseg/model.cc | 232 - csrcs/fastdeploy/vision/ppseg/model.h | 43 - csrcs/fastdeploy/vision/ppseg/ppseg_pybind.cc | 35 - .../fastdeploy/vision/utils/FDTensor2CVMat.cc | 59 - .../vision/utils/cosine_similarity.cc | 49 - csrcs/fastdeploy/vision/utils/l2_normalize.cc | 41 - csrcs/fastdeploy/vision/utils/nms.cc | 127 - csrcs/fastdeploy/vision/utils/sort_det_res.cc | 81 - .../vision/utils/sort_face_det_res.cc | 69 - csrcs/fastdeploy/vision/utils/utils.h | 140 - csrcs/fastdeploy/vision/vision_pybind.cc | 93 - .../fastdeploy/vision/visualize/detection.cc | 64 - .../vision/visualize/face_detection.cc | 84 - .../vision/visualize/matting_alpha.cc | 119 - .../vision/visualize/segmentation.cc | 46 - .../fastdeploy/vision/visualize/visualize.cc | 47 - csrcs/fastdeploy/vision/visualize/visualize.h | 41 - .../vision/visualize/visualize_pybind.cc | 62 - examples/.gitignore | 8 - examples/CMakeLists.txt | 50 - examples/resources/.gitignore | 15 - examples/resources/images/.gitignore | 3 - examples/resources/models/.gitignore | 5 - examples/resources/outputs/.gitignore | 3 - examples/text/ernie_tokencls.cc | 225 - examples/vision/biubug6_retinaface.cc | 55 - examples/vision/deepcam_yolov5face.cc | 53 - examples/vision/deepinsight_arcface.cc | 64 - examples/vision/deepinsight_cosface.cc | 64 - .../vision/deepinsight_insightface_rec.cc | 64 - examples/vision/deepinsight_partial_fc.cc | 64 - examples/vision/deepinsight_scrfd.cc | 51 - examples/vision/deepinsight_vpl.cc | 64 - .../vision/detection/README.md | 0 .../vision/detection/yolov7/README.md | 0 .../detection/yolov7/cpp/CMakeLists.txt | 0 .../vision/detection/yolov7/cpp/README.md | 0 .../vision/detection/yolov7/cpp/infer.cc | 0 .../vision/detection/yolov7/python/README.md | 0 .../vision/detection/yolov7/python/infer.py | 0 examples/vision/linzaer_ultraface.cc | 53 - examples/vision/megvii_yolox.cc | 52 - examples/vision/meituan_yolov6.cc | 52 - examples/vision/ppdet_ppyoloe.cc | 51 - examples/vision/ppogg_yolov5lite.cc | 52 - examples/vision/ppseg_unet.cc | 59 - examples/vision/rangilyu_nanodet_plus.cc | 53 - examples/vision/ultralytics_yolov5.cc | 52 - examples/vision/wongkinyiu_scaledyolov4.cc | 52 - examples/vision/wongkinyiu_yolor.cc | 52 - examples/vision/wongkinyiu_yolov7.cc | 52 - examples/vision/zhkkke_modnet.cc | 58 - model_zoo/.gitignore | 12 - model_zoo/text/ernie-3.0/README.md | 238 -- model_zoo/text/ernie-3.0/ernie_predictor.py | 242 -- model_zoo/text/ernie-3.0/infer_cpu.py | 78 - model_zoo/text/ernie-3.0/infer_gpu.py | 84 - model_zoo/text/ernie-3.0/requirements_cpu.txt | 3 - model_zoo/text/ernie-3.0/requirements_gpu.txt | 4 - model_zoo/vision/arcface/README.md | 80 - model_zoo/vision/arcface/api.md | 113 - model_zoo/vision/arcface/arcface.py | 46 - model_zoo/vision/arcface/cpp/CMakeLists.txt | 17 - model_zoo/vision/arcface/cpp/README.md | 61 - model_zoo/vision/arcface/cpp/arcface.cc | 64 - model_zoo/vision/modnet/README.md | 67 - model_zoo/vision/modnet/api.md | 72 - model_zoo/vision/modnet/cpp/CMakeLists.txt | 17 - model_zoo/vision/modnet/cpp/README.md | 49 - model_zoo/vision/modnet/cpp/modnet.cc | 57 - model_zoo/vision/modnet/modnet.py | 22 - model_zoo/vision/nanodet_plus/README.md | 46 - model_zoo/vision/nanodet_plus/api.md | 71 - .../vision/nanodet_plus/cpp/CMakeLists.txt | 17 - model_zoo/vision/nanodet_plus/cpp/README.md | 30 - .../vision/nanodet_plus/cpp/nanodet_plus.cc | 40 - model_zoo/vision/nanodet_plus/nanodet_plus.py | 23 - model_zoo/vision/ppseg/ppseg_unet.py | 26 - model_zoo/vision/retinaface/README.md | 76 - model_zoo/vision/retinaface/api.md | 71 - .../vision/retinaface/cpp/CMakeLists.txt | 17 - model_zoo/vision/retinaface/cpp/README.md | 61 - model_zoo/vision/retinaface/cpp/retinaface.cc | 49 - model_zoo/vision/retinaface/retinaface.py | 24 - model_zoo/vision/scaledyolov4/README.md | 66 - model_zoo/vision/scaledyolov4/api.md | 71 - .../vision/scaledyolov4/cpp/CMakeLists.txt | 17 - model_zoo/vision/scaledyolov4/cpp/README.md | 53 - .../vision/scaledyolov4/cpp/scaledyolov4.cc | 40 - .../vision/scaledyolov4/scaled_yolov4.py | 21 - model_zoo/vision/scrfd/README.md | 92 - model_zoo/vision/scrfd/api.md | 71 - model_zoo/vision/scrfd/cpp/CMakeLists.txt | 17 - model_zoo/vision/scrfd/cpp/README.md | 76 - model_zoo/vision/scrfd/cpp/scrfd.cc | 44 - model_zoo/vision/scrfd/scrfd.py | 25 - model_zoo/vision/ultraface/README.md | 49 - model_zoo/vision/ultraface/api.md | 71 - model_zoo/vision/ultraface/cpp/CMakeLists.txt | 17 - model_zoo/vision/ultraface/cpp/README.md | 36 - model_zoo/vision/ultraface/cpp/ultraface.cc | 48 - model_zoo/vision/ultraface/ultraface.py | 23 - model_zoo/vision/yolor/README.md | 66 - model_zoo/vision/yolor/api.md | 71 - model_zoo/vision/yolor/cpp/CMakeLists.txt | 17 - model_zoo/vision/yolor/cpp/README.md | 53 - model_zoo/vision/yolor/cpp/yolor.cc | 40 - model_zoo/vision/yolor/yolor.py | 21 - model_zoo/vision/yolov5/README.md | 47 - model_zoo/vision/yolov5/api.md | 71 - model_zoo/vision/yolov5/cpp/CMakeLists.txt | 18 - model_zoo/vision/yolov5/cpp/README.md | 31 - model_zoo/vision/yolov5/cpp/yolov5.cc | 40 - model_zoo/vision/yolov5/yolov5.py | 23 - model_zoo/vision/yolov5face/README.md | 78 - model_zoo/vision/yolov5face/api.md | 71 - .../vision/yolov5face/cpp/CMakeLists.txt | 17 - model_zoo/vision/yolov5face/cpp/README.md | 60 - model_zoo/vision/yolov5face/cpp/yolov5face.cc | 40 - model_zoo/vision/yolov5face/yolov5face.py | 17 - model_zoo/vision/yolov5lite/README.md | 130 - model_zoo/vision/yolov5lite/api.md | 71 - .../vision/yolov5lite/cpp/CMakeLists.txt | 17 - model_zoo/vision/yolov5lite/cpp/README.md | 117 - model_zoo/vision/yolov5lite/cpp/yolov5lite.cc | 42 - model_zoo/vision/yolov5lite/yolov5lite.py | 24 - model_zoo/vision/yolov6/README.md | 47 - model_zoo/vision/yolov6/api.md | 71 - model_zoo/vision/yolov6/cpp/CMakeLists.txt | 17 - model_zoo/vision/yolov6/cpp/README.md | 31 - model_zoo/vision/yolov6/cpp/yolov6.cc | 40 - model_zoo/vision/yolov6/yolov6.py | 23 - model_zoo/vision/yolov7/README.md | 70 - model_zoo/vision/yolov7/api.md | 71 - model_zoo/vision/yolov7/cpp/CMakeLists.txt | 17 - model_zoo/vision/yolov7/cpp/README.md | 53 - model_zoo/vision/yolov7/cpp/yolov7.cc | 40 - model_zoo/vision/yolov7/yolov7.py | 21 - model_zoo/vision/yolox/README.md | 47 - model_zoo/vision/yolox/api.md | 71 - model_zoo/vision/yolox/cpp/CMakeLists.txt | 17 - model_zoo/vision/yolox/cpp/README.md | 31 - model_zoo/vision/yolox/cpp/yolox.cc | 40 - model_zoo/vision/yolox/yolox.py | 22 - sdk_mannager/fastdeploy/__init__.py | 230 - sdk_mannager/fastdeploy/__main__.py | 18 - sdk_mannager/fastdeploy/download.py | 186 - sdk_mannager/requirements.txt | 2 - sdk_mannager/setup.py | 36 - 333 files changed, 1 insertion(+), 37500 deletions(-) delete mode 100644 csrcs/fastdeploy/CMakeLists.txt delete mode 100644 csrcs/fastdeploy/backends/backend.h delete mode 100644 csrcs/fastdeploy/backends/common/multiclass_nms.cc delete mode 100644 csrcs/fastdeploy/backends/common/multiclass_nms.h delete mode 100644 csrcs/fastdeploy/backends/ort/ops/multiclass_nms.cc delete mode 100644 csrcs/fastdeploy/backends/ort/ops/multiclass_nms.h delete mode 100644 csrcs/fastdeploy/backends/ort/ort_backend.cc delete mode 100644 csrcs/fastdeploy/backends/ort/ort_backend.h delete mode 100644 csrcs/fastdeploy/backends/ort/utils.cc delete mode 100644 csrcs/fastdeploy/backends/ort/utils.h delete mode 100644 csrcs/fastdeploy/backends/paddle/paddle_backend.cc delete mode 100644 csrcs/fastdeploy/backends/paddle/paddle_backend.h delete mode 100644 csrcs/fastdeploy/backends/paddle/util.cc delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/BatchStream.h delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/CPPLINT.cfg delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/EntropyCalibrator.h delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/ErrorRecorder.h delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/README.md delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/argsParser.h delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/buffers.h delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/common.h delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/getOptions.cpp delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/getOptions.h delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/half.h delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/logger.cpp delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/logger.h delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/logging.h delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/parserOnnxConfig.h delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/safeCommon.h delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/sampleConfig.h delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/sampleDevice.h delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/sampleEngines.cpp delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/sampleEngines.h delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/sampleInference.cpp delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/sampleInference.h delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/sampleOptions.cpp delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/sampleOptions.h delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/sampleReporting.cpp delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/sampleReporting.h delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/sampleUtils.h delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/windows/getopt.c delete mode 100644 csrcs/fastdeploy/backends/tensorrt/common/windows/getopt.h delete mode 100644 csrcs/fastdeploy/backends/tensorrt/trt_backend.cc delete mode 100644 csrcs/fastdeploy/backends/tensorrt/trt_backend.h delete mode 100644 csrcs/fastdeploy/core/config.h.in delete mode 100644 csrcs/fastdeploy/core/fd_tensor.cc delete mode 100644 csrcs/fastdeploy/core/fd_tensor.h delete mode 100644 csrcs/fastdeploy/core/fd_type.cc delete mode 100644 csrcs/fastdeploy/core/fd_type.h delete mode 100644 csrcs/fastdeploy/fastdeploy_model.cc delete mode 100644 csrcs/fastdeploy/fastdeploy_model.h delete mode 100644 csrcs/fastdeploy/fastdeploy_runtime.cc delete mode 100644 csrcs/fastdeploy/fastdeploy_runtime.h delete mode 100644 csrcs/fastdeploy/function/eigen.cc delete mode 100644 csrcs/fastdeploy/function/eigen.h delete mode 100644 csrcs/fastdeploy/function/reduce.cc delete mode 100644 csrcs/fastdeploy/function/reduce.h delete mode 100644 csrcs/fastdeploy/function/reduce_functor.h delete mode 100644 csrcs/fastdeploy/pybind/fastdeploy_model.cc delete mode 100644 csrcs/fastdeploy/pybind/fastdeploy_runtime.cc delete mode 100644 csrcs/fastdeploy/pybind/main.cc.in delete mode 100644 csrcs/fastdeploy/pybind/main.h delete mode 100644 csrcs/fastdeploy/text.h delete mode 100644 csrcs/fastdeploy/text/common/option.h delete mode 100644 csrcs/fastdeploy/text/common/result.cc delete mode 100644 csrcs/fastdeploy/text/common/result.h delete mode 100644 csrcs/fastdeploy/text/postprocessor/postprocessor.cc delete mode 100644 csrcs/fastdeploy/text/postprocessor/postprocessor.h delete mode 100644 csrcs/fastdeploy/text/preprocessor/preprocessor.cc delete mode 100644 csrcs/fastdeploy/text/preprocessor/preprocessor.h delete mode 100644 csrcs/fastdeploy/text/text_model.cc delete mode 100644 csrcs/fastdeploy/text/text_model.h delete mode 100644 csrcs/fastdeploy/text/text_pybind.cc delete mode 100644 csrcs/fastdeploy/utils/perf.h delete mode 100644 csrcs/fastdeploy/utils/unique_ptr.h delete mode 100644 csrcs/fastdeploy/utils/utils.cc delete mode 100644 csrcs/fastdeploy/utils/utils.h delete mode 100644 csrcs/fastdeploy/vision.h delete mode 100644 csrcs/fastdeploy/vision/AddModel.md delete mode 100644 csrcs/fastdeploy/vision/common/processors/base.cc delete mode 100644 csrcs/fastdeploy/vision/common/processors/base.h delete mode 100644 csrcs/fastdeploy/vision/common/processors/cast.cc delete mode 100644 csrcs/fastdeploy/vision/common/processors/cast.h delete mode 100644 csrcs/fastdeploy/vision/common/processors/center_crop.cc delete mode 100644 csrcs/fastdeploy/vision/common/processors/center_crop.h delete mode 100644 csrcs/fastdeploy/vision/common/processors/color_space_convert.cc delete mode 100644 csrcs/fastdeploy/vision/common/processors/color_space_convert.h delete mode 100644 csrcs/fastdeploy/vision/common/processors/convert.cc delete mode 100644 csrcs/fastdeploy/vision/common/processors/convert.h delete mode 100644 csrcs/fastdeploy/vision/common/processors/hwc2chw.cc delete mode 100644 csrcs/fastdeploy/vision/common/processors/hwc2chw.h delete mode 100644 csrcs/fastdeploy/vision/common/processors/mat.cc delete mode 100644 csrcs/fastdeploy/vision/common/processors/mat.h delete mode 100644 csrcs/fastdeploy/vision/common/processors/normalize.cc delete mode 100644 csrcs/fastdeploy/vision/common/processors/normalize.h delete mode 100644 csrcs/fastdeploy/vision/common/processors/pad.cc delete mode 100644 csrcs/fastdeploy/vision/common/processors/pad.h delete mode 100644 csrcs/fastdeploy/vision/common/processors/pad_to_size.cc delete mode 100644 csrcs/fastdeploy/vision/common/processors/pad_to_size.h delete mode 100644 csrcs/fastdeploy/vision/common/processors/resize.cc delete mode 100644 csrcs/fastdeploy/vision/common/processors/resize.h delete mode 100644 csrcs/fastdeploy/vision/common/processors/resize_by_short.cc delete mode 100644 csrcs/fastdeploy/vision/common/processors/resize_by_short.h delete mode 100644 csrcs/fastdeploy/vision/common/processors/stride_pad.cc delete mode 100644 csrcs/fastdeploy/vision/common/processors/stride_pad.h delete mode 100644 csrcs/fastdeploy/vision/common/processors/transform.h delete mode 100644 csrcs/fastdeploy/vision/common/result.cc delete mode 100644 csrcs/fastdeploy/vision/common/result.h delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/nanodet_plus.cc delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/nanodet_plus.h delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/nanodet_plus_pybind.cc delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/scaledyolov4.cc delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/scaledyolov4.h delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/scaledyolov4_pybind.cc delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/yolor.cc delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/yolor.h delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/yolor_pybind.cc delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/yolov5.cc delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/yolov5.h delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/yolov5_pybind.cc delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/yolov5lite.cc delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/yolov5lite.h delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/yolov5lite_pybind.cc delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/yolov6.cc delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/yolov6.h delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/yolov6_pybind.cc delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/yolov7.cc delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/yolov7.h delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/yolov7_pybind.cc delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/yolox.cc delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/yolox.h delete mode 100644 csrcs/fastdeploy/vision/detection/contrib/yolox_pybind.cc delete mode 100644 csrcs/fastdeploy/vision/detection/detection_pybind.cc delete mode 100644 csrcs/fastdeploy/vision/detection/ppdet/model.h delete mode 100644 csrcs/fastdeploy/vision/detection/ppdet/picodet.cc delete mode 100644 csrcs/fastdeploy/vision/detection/ppdet/picodet.h delete mode 100644 csrcs/fastdeploy/vision/detection/ppdet/ppdet_pybind.cc delete mode 100644 csrcs/fastdeploy/vision/detection/ppdet/ppyolo.cc delete mode 100644 csrcs/fastdeploy/vision/detection/ppdet/ppyolo.h delete mode 100644 csrcs/fastdeploy/vision/detection/ppdet/ppyoloe.cc delete mode 100644 csrcs/fastdeploy/vision/detection/ppdet/ppyoloe.h delete mode 100644 csrcs/fastdeploy/vision/detection/ppdet/rcnn.cc delete mode 100644 csrcs/fastdeploy/vision/detection/ppdet/rcnn.h delete mode 100644 csrcs/fastdeploy/vision/detection/ppdet/yolov3.cc delete mode 100644 csrcs/fastdeploy/vision/detection/ppdet/yolov3.h delete mode 100644 csrcs/fastdeploy/vision/detection/ppdet/yolox.cc delete mode 100644 csrcs/fastdeploy/vision/detection/ppdet/yolox.h delete mode 100644 csrcs/fastdeploy/vision/facedet/contrib/retinaface.cc delete mode 100644 csrcs/fastdeploy/vision/facedet/contrib/retinaface.h delete mode 100644 csrcs/fastdeploy/vision/facedet/contrib/retinaface_pybind.cc delete mode 100644 csrcs/fastdeploy/vision/facedet/contrib/scrfd.cc delete mode 100644 csrcs/fastdeploy/vision/facedet/contrib/scrfd.h delete mode 100644 csrcs/fastdeploy/vision/facedet/contrib/scrfd_pybind.cc delete mode 100644 csrcs/fastdeploy/vision/facedet/contrib/ultraface.cc delete mode 100644 csrcs/fastdeploy/vision/facedet/contrib/ultraface.h delete mode 100644 csrcs/fastdeploy/vision/facedet/contrib/ultraface_pybind.cc delete mode 100644 csrcs/fastdeploy/vision/facedet/contrib/yolov5face.cc delete mode 100644 csrcs/fastdeploy/vision/facedet/contrib/yolov5face.h delete mode 100644 csrcs/fastdeploy/vision/facedet/contrib/yolov5face_pybind.cc delete mode 100644 csrcs/fastdeploy/vision/facedet/facedet_pybind.cc delete mode 100644 csrcs/fastdeploy/vision/faceid/contrib/arcface.cc delete mode 100644 csrcs/fastdeploy/vision/faceid/contrib/arcface.h delete mode 100644 csrcs/fastdeploy/vision/faceid/contrib/arcface_pybind.cc delete mode 100644 csrcs/fastdeploy/vision/faceid/contrib/cosface.cc delete mode 100644 csrcs/fastdeploy/vision/faceid/contrib/cosface.h delete mode 100644 csrcs/fastdeploy/vision/faceid/contrib/cosface_pybind.cc delete mode 100644 csrcs/fastdeploy/vision/faceid/contrib/insightface_rec.cc delete mode 100644 csrcs/fastdeploy/vision/faceid/contrib/insightface_rec.h delete mode 100644 csrcs/fastdeploy/vision/faceid/contrib/insightface_rec_pybind.cc delete mode 100644 csrcs/fastdeploy/vision/faceid/contrib/partial_fc.cc delete mode 100644 csrcs/fastdeploy/vision/faceid/contrib/partial_fc.h delete mode 100644 csrcs/fastdeploy/vision/faceid/contrib/partial_fc_pybind.cc delete mode 100644 csrcs/fastdeploy/vision/faceid/contrib/vpl.cc delete mode 100644 csrcs/fastdeploy/vision/faceid/contrib/vpl.h delete mode 100644 csrcs/fastdeploy/vision/faceid/contrib/vpl_pybind.cc delete mode 100644 csrcs/fastdeploy/vision/faceid/faceid_pybind.cc delete mode 100644 csrcs/fastdeploy/vision/matting/contrib/modnet.cc delete mode 100644 csrcs/fastdeploy/vision/matting/contrib/modnet.h delete mode 100644 csrcs/fastdeploy/vision/matting/contrib/modnet_pybind.cc delete mode 100644 csrcs/fastdeploy/vision/matting/matting_pybind.cc delete mode 100644 csrcs/fastdeploy/vision/ppcls/model.cc delete mode 100644 csrcs/fastdeploy/vision/ppcls/model.h delete mode 100644 csrcs/fastdeploy/vision/ppcls/ppcls_pybind.cc delete mode 100644 csrcs/fastdeploy/vision/ppseg/model.cc delete mode 100644 csrcs/fastdeploy/vision/ppseg/model.h delete mode 100644 csrcs/fastdeploy/vision/ppseg/ppseg_pybind.cc delete mode 100644 csrcs/fastdeploy/vision/utils/FDTensor2CVMat.cc delete mode 100644 csrcs/fastdeploy/vision/utils/cosine_similarity.cc delete mode 100644 csrcs/fastdeploy/vision/utils/l2_normalize.cc delete mode 100644 csrcs/fastdeploy/vision/utils/nms.cc delete mode 100644 csrcs/fastdeploy/vision/utils/sort_det_res.cc delete mode 100644 csrcs/fastdeploy/vision/utils/sort_face_det_res.cc delete mode 100644 csrcs/fastdeploy/vision/utils/utils.h delete mode 100644 csrcs/fastdeploy/vision/vision_pybind.cc delete mode 100644 csrcs/fastdeploy/vision/visualize/detection.cc delete mode 100644 csrcs/fastdeploy/vision/visualize/face_detection.cc delete mode 100644 csrcs/fastdeploy/vision/visualize/matting_alpha.cc delete mode 100644 csrcs/fastdeploy/vision/visualize/segmentation.cc delete mode 100644 csrcs/fastdeploy/vision/visualize/visualize.cc delete mode 100644 csrcs/fastdeploy/vision/visualize/visualize.h delete mode 100644 csrcs/fastdeploy/vision/visualize/visualize_pybind.cc delete mode 100644 examples/.gitignore delete mode 100644 examples/CMakeLists.txt delete mode 100644 examples/resources/.gitignore delete mode 100644 examples/resources/images/.gitignore delete mode 100644 examples/resources/models/.gitignore delete mode 100644 examples/resources/outputs/.gitignore delete mode 100644 examples/text/ernie_tokencls.cc delete mode 100644 examples/vision/biubug6_retinaface.cc delete mode 100644 examples/vision/deepcam_yolov5face.cc delete mode 100644 examples/vision/deepinsight_arcface.cc delete mode 100644 examples/vision/deepinsight_cosface.cc delete mode 100644 examples/vision/deepinsight_insightface_rec.cc delete mode 100644 examples/vision/deepinsight_partial_fc.cc delete mode 100644 examples/vision/deepinsight_scrfd.cc delete mode 100644 examples/vision/deepinsight_vpl.cc rename {new_examples => examples}/vision/detection/README.md (100%) rename {new_examples => examples}/vision/detection/yolov7/README.md (100%) rename {new_examples => examples}/vision/detection/yolov7/cpp/CMakeLists.txt (100%) rename {new_examples => examples}/vision/detection/yolov7/cpp/README.md (100%) rename {new_examples => examples}/vision/detection/yolov7/cpp/infer.cc (100%) rename {new_examples => examples}/vision/detection/yolov7/python/README.md (100%) rename {new_examples => examples}/vision/detection/yolov7/python/infer.py (100%) delete mode 100644 examples/vision/linzaer_ultraface.cc delete mode 100644 examples/vision/megvii_yolox.cc delete mode 100644 examples/vision/meituan_yolov6.cc delete mode 100644 examples/vision/ppdet_ppyoloe.cc delete mode 100644 examples/vision/ppogg_yolov5lite.cc delete mode 100644 examples/vision/ppseg_unet.cc delete mode 100644 examples/vision/rangilyu_nanodet_plus.cc delete mode 100644 examples/vision/ultralytics_yolov5.cc delete mode 100644 examples/vision/wongkinyiu_scaledyolov4.cc delete mode 100644 examples/vision/wongkinyiu_yolor.cc delete mode 100644 examples/vision/wongkinyiu_yolov7.cc delete mode 100644 examples/vision/zhkkke_modnet.cc delete mode 100644 model_zoo/.gitignore delete mode 100755 model_zoo/text/ernie-3.0/README.md delete mode 100755 model_zoo/text/ernie-3.0/ernie_predictor.py delete mode 100755 model_zoo/text/ernie-3.0/infer_cpu.py delete mode 100755 model_zoo/text/ernie-3.0/infer_gpu.py delete mode 100755 model_zoo/text/ernie-3.0/requirements_cpu.txt delete mode 100755 model_zoo/text/ernie-3.0/requirements_gpu.txt delete mode 100644 model_zoo/vision/arcface/README.md delete mode 100644 model_zoo/vision/arcface/api.md delete mode 100644 model_zoo/vision/arcface/arcface.py delete mode 100644 model_zoo/vision/arcface/cpp/CMakeLists.txt delete mode 100644 model_zoo/vision/arcface/cpp/README.md delete mode 100644 model_zoo/vision/arcface/cpp/arcface.cc delete mode 100644 model_zoo/vision/modnet/README.md delete mode 100644 model_zoo/vision/modnet/api.md delete mode 100644 model_zoo/vision/modnet/cpp/CMakeLists.txt delete mode 100644 model_zoo/vision/modnet/cpp/README.md delete mode 100644 model_zoo/vision/modnet/cpp/modnet.cc delete mode 100644 model_zoo/vision/modnet/modnet.py delete mode 100644 model_zoo/vision/nanodet_plus/README.md delete mode 100644 model_zoo/vision/nanodet_plus/api.md delete mode 100644 model_zoo/vision/nanodet_plus/cpp/CMakeLists.txt delete mode 100644 model_zoo/vision/nanodet_plus/cpp/README.md delete mode 100644 model_zoo/vision/nanodet_plus/cpp/nanodet_plus.cc delete mode 100644 model_zoo/vision/nanodet_plus/nanodet_plus.py delete mode 100644 model_zoo/vision/ppseg/ppseg_unet.py delete mode 100644 model_zoo/vision/retinaface/README.md delete mode 100644 model_zoo/vision/retinaface/api.md delete mode 100644 model_zoo/vision/retinaface/cpp/CMakeLists.txt delete mode 100644 model_zoo/vision/retinaface/cpp/README.md delete mode 100644 model_zoo/vision/retinaface/cpp/retinaface.cc delete mode 100644 model_zoo/vision/retinaface/retinaface.py delete mode 100644 model_zoo/vision/scaledyolov4/README.md delete mode 100644 model_zoo/vision/scaledyolov4/api.md delete mode 100644 model_zoo/vision/scaledyolov4/cpp/CMakeLists.txt delete mode 100644 model_zoo/vision/scaledyolov4/cpp/README.md delete mode 100644 model_zoo/vision/scaledyolov4/cpp/scaledyolov4.cc delete mode 100644 model_zoo/vision/scaledyolov4/scaled_yolov4.py delete mode 100644 model_zoo/vision/scrfd/README.md delete mode 100644 model_zoo/vision/scrfd/api.md delete mode 100644 model_zoo/vision/scrfd/cpp/CMakeLists.txt delete mode 100644 model_zoo/vision/scrfd/cpp/README.md delete mode 100644 model_zoo/vision/scrfd/cpp/scrfd.cc delete mode 100644 model_zoo/vision/scrfd/scrfd.py delete mode 100644 model_zoo/vision/ultraface/README.md delete mode 100644 model_zoo/vision/ultraface/api.md delete mode 100644 model_zoo/vision/ultraface/cpp/CMakeLists.txt delete mode 100644 model_zoo/vision/ultraface/cpp/README.md delete mode 100644 model_zoo/vision/ultraface/cpp/ultraface.cc delete mode 100644 model_zoo/vision/ultraface/ultraface.py delete mode 100644 model_zoo/vision/yolor/README.md delete mode 100644 model_zoo/vision/yolor/api.md delete mode 100644 model_zoo/vision/yolor/cpp/CMakeLists.txt delete mode 100644 model_zoo/vision/yolor/cpp/README.md delete mode 100644 model_zoo/vision/yolor/cpp/yolor.cc delete mode 100644 model_zoo/vision/yolor/yolor.py delete mode 100644 model_zoo/vision/yolov5/README.md delete mode 100644 model_zoo/vision/yolov5/api.md delete mode 100644 model_zoo/vision/yolov5/cpp/CMakeLists.txt delete mode 100644 model_zoo/vision/yolov5/cpp/README.md delete mode 100644 model_zoo/vision/yolov5/cpp/yolov5.cc delete mode 100644 model_zoo/vision/yolov5/yolov5.py delete mode 100644 model_zoo/vision/yolov5face/README.md delete mode 100644 model_zoo/vision/yolov5face/api.md delete mode 100644 model_zoo/vision/yolov5face/cpp/CMakeLists.txt delete mode 100644 model_zoo/vision/yolov5face/cpp/README.md delete mode 100644 model_zoo/vision/yolov5face/cpp/yolov5face.cc delete mode 100644 model_zoo/vision/yolov5face/yolov5face.py delete mode 100644 model_zoo/vision/yolov5lite/README.md delete mode 100644 model_zoo/vision/yolov5lite/api.md delete mode 100644 model_zoo/vision/yolov5lite/cpp/CMakeLists.txt delete mode 100644 model_zoo/vision/yolov5lite/cpp/README.md delete mode 100644 model_zoo/vision/yolov5lite/cpp/yolov5lite.cc delete mode 100644 model_zoo/vision/yolov5lite/yolov5lite.py delete mode 100644 model_zoo/vision/yolov6/README.md delete mode 100644 model_zoo/vision/yolov6/api.md delete mode 100644 model_zoo/vision/yolov6/cpp/CMakeLists.txt delete mode 100644 model_zoo/vision/yolov6/cpp/README.md delete mode 100644 model_zoo/vision/yolov6/cpp/yolov6.cc delete mode 100644 model_zoo/vision/yolov6/yolov6.py delete mode 100644 model_zoo/vision/yolov7/README.md delete mode 100644 model_zoo/vision/yolov7/api.md delete mode 100644 model_zoo/vision/yolov7/cpp/CMakeLists.txt delete mode 100644 model_zoo/vision/yolov7/cpp/README.md delete mode 100644 model_zoo/vision/yolov7/cpp/yolov7.cc delete mode 100644 model_zoo/vision/yolov7/yolov7.py delete mode 100644 model_zoo/vision/yolox/README.md delete mode 100644 model_zoo/vision/yolox/api.md delete mode 100644 model_zoo/vision/yolox/cpp/CMakeLists.txt delete mode 100644 model_zoo/vision/yolox/cpp/README.md delete mode 100644 model_zoo/vision/yolox/cpp/yolox.cc delete mode 100644 model_zoo/vision/yolox/yolox.py delete mode 100644 sdk_mannager/fastdeploy/__init__.py delete mode 100644 sdk_mannager/fastdeploy/__main__.py delete mode 100644 sdk_mannager/fastdeploy/download.py delete mode 100644 sdk_mannager/requirements.txt delete mode 100644 sdk_mannager/setup.py diff --git a/CMakeLists.txt b/CMakeLists.txt index 0c62522c8..7e2621f6c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,7 @@ option(CSRCS_DIR_NAME "Name of source code directory") option(LIBRARY_NAME "Name of build library name") option(PY_LIBRARY_NAME "Name of build python library name") if(NOT CSRCS_DIR_NAME) - set(CSRCS_DIR_NAME "csrcs") + set(CSRCS_DIR_NAME "csrc") endif() if(NOT LIBRARY_NAME) set(LIBRARY_NAME "fastdeploy") @@ -55,10 +55,6 @@ option(ENABLE_FDTENSOR_FUNC "Whether to compile with function of FDTensor." OFF) option(ENABLE_OPENCV_CUDA "Whether to enable opencv with cuda, this will allow process image with GPU." OFF) option(ENABLE_DEBUG "Whether to enable print debug information, this may reduce performance." OFF) -# Whether to build fastdeply with vision/text/... examples, only for testings. -option(WITH_VISION_EXAMPLES "Whether to build fastdeply with vision examples" OFF) -option(WITH_TEXT_EXAMPLES "Whether to build fastdeply with text examples" OFF) - # config GIT_URL with github mirrors to speed up dependent repos clone option(GIT_URL "Git URL to clone dependent repos" ${GIT_URL}) if(NOT GIT_URL) @@ -102,19 +98,6 @@ set(HEAD_DIR "${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}") include_directories(${HEAD_DIR}) include_directories(${CMAKE_CURRENT_BINARY_DIR}) -if (WITH_VISION_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples) - # ENABLE_VISION and ENABLE_VISION_VISUALIZE must be ON if enable vision examples. - message(STATUS "Found WTIH_VISION_EXAMPLES ON, so, force ENABLE_VISION and ENABLE_VISION_VISUALIZE ON") - set(ENABLE_VISION ON CACHE BOOL "force to enable vision models usage" FORCE) - set(ENABLE_VISION_VISUALIZE ON CACHE BOOL "force to enable visualize vision model result toolbox" FORCE) -endif() - -if (WITH_TEXT_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples) - # ENABLE_TEXT must be ON if enable text examples. - message(STATUS "Found WITH_TEXT_EXAMPLES ON, so, force ENABLE_TEXT ON") - set(ENABLE_TEXT ON CACHE BOOL "force to enable text models usage" FORCE) -endif() - add_definitions(-DFASTDEPLOY_LIB) file(GLOB_RECURSE ALL_DEPLOY_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/*.cc) file(GLOB_RECURSE FDTENSOR_FUNC_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/function/*.cc) @@ -277,22 +260,6 @@ if(MSVC) endif() target_link_libraries(${LIBRARY_NAME} ${DEPEND_LIBS}) -# add examples after prepare include paths for third-parties -if (WITH_VISION_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples) - add_definitions(-DWITH_VISION_EXAMPLES) - set(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/examples/bin) - add_subdirectory(examples) -endif() - -if (WITH_TEXT_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples) - add_definitions(-DWITH_TEXT_EXAMPLES) - set(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/examples/bin) - # Avoid to add_subdirectory repeatedly - if (NOT WITH_VISION_EXAMPLES) - add_subdirectory(examples) - endif() -endif() - if (WITH_TESTING AND EXISTS ${PROJECT_SOURCE_DIR}/tests) add_definitions(-DWITH_TESTING) include(external/gtest.cmake) diff --git a/csrcs/fastdeploy/CMakeLists.txt b/csrcs/fastdeploy/CMakeLists.txt deleted file mode 100644 index e69de29bb..000000000 diff --git a/csrcs/fastdeploy/backends/backend.h b/csrcs/fastdeploy/backends/backend.h deleted file mode 100644 index de7b5a575..000000000 --- a/csrcs/fastdeploy/backends/backend.h +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include -#include -#include "fastdeploy/backends/common/multiclass_nms.h" -#include "fastdeploy/core/fd_tensor.h" - -namespace fastdeploy { - -struct TensorInfo { - std::string name; - std::vector shape; - FDDataType dtype; -}; - -class BaseBackend { - public: - bool initialized_ = false; - - BaseBackend() {} - virtual ~BaseBackend() = default; - - virtual bool Initialized() const { return initialized_; } - - virtual int NumInputs() const = 0; - virtual int NumOutputs() const = 0; - virtual TensorInfo GetInputInfo(int index) = 0; - virtual TensorInfo GetOutputInfo(int index) = 0; - virtual bool Infer(std::vector& inputs, - std::vector* outputs) = 0; -}; - -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/backends/common/multiclass_nms.cc b/csrcs/fastdeploy/backends/common/multiclass_nms.cc deleted file mode 100644 index c3d65ec7d..000000000 --- a/csrcs/fastdeploy/backends/common/multiclass_nms.cc +++ /dev/null @@ -1,224 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/backends/common/multiclass_nms.h" -#include -#include "fastdeploy/core/fd_tensor.h" -#include "fastdeploy/utils/utils.h" - -namespace fastdeploy { -namespace backend { -template -bool SortScorePairDescend(const std::pair& pair1, - const std::pair& pair2) { - return pair1.first > pair2.first; -} - -void GetMaxScoreIndex(const float* scores, const int& score_size, - const float& threshold, const int& top_k, - std::vector>* sorted_indices) { - for (size_t i = 0; i < score_size; ++i) { - if (scores[i] > threshold) { - sorted_indices->push_back(std::make_pair(scores[i], i)); - } - } - // Sort the score pair according to the scores in descending order - std::stable_sort(sorted_indices->begin(), sorted_indices->end(), - SortScorePairDescend); - // Keep top_k scores if needed. - if (top_k > -1 && top_k < static_cast(sorted_indices->size())) { - sorted_indices->resize(top_k); - } -} - -float BBoxArea(const float* box, const bool& normalized) { - if (box[2] < box[0] || box[3] < box[1]) { - // If coordinate values are is invalid - // (e.g. xmax < xmin or ymax < ymin), return 0. - return 0.f; - } else { - const float w = box[2] - box[0]; - const float h = box[3] - box[1]; - if (normalized) { - return w * h; - } else { - // If coordinate values are not within range [0, 1]. - return (w + 1) * (h + 1); - } - } -} - -float JaccardOverlap(const float* box1, const float* box2, - const bool& normalized) { - if (box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] || - box2[3] < box1[1]) { - return 0.f; - } else { - const float inter_xmin = std::max(box1[0], box2[0]); - const float inter_ymin = std::max(box1[1], box2[1]); - const float inter_xmax = std::min(box1[2], box2[2]); - const float inter_ymax = std::min(box1[3], box2[3]); - float norm = normalized ? 0.0f : 1.0f; - float inter_w = inter_xmax - inter_xmin + norm; - float inter_h = inter_ymax - inter_ymin + norm; - const float inter_area = inter_w * inter_h; - const float bbox1_area = BBoxArea(box1, normalized); - const float bbox2_area = BBoxArea(box2, normalized); - return inter_area / (bbox1_area + bbox2_area - inter_area); - } -} - -void MultiClassNMS::FastNMS(const float* boxes, const float* scores, - const int& num_boxes, - std::vector* keep_indices) { - std::vector> sorted_indices; - GetMaxScoreIndex(scores, num_boxes, score_threshold, nms_top_k, - &sorted_indices); - - float adaptive_threshold = nms_threshold; - while (sorted_indices.size() != 0) { - const int idx = sorted_indices.front().second; - bool keep = true; - for (size_t k = 0; k < keep_indices->size(); ++k) { - if (!keep) { - break; - } - const int kept_idx = (*keep_indices)[k]; - float overlap = - JaccardOverlap(boxes + idx * 4, boxes + kept_idx * 4, normalized); - keep = overlap <= adaptive_threshold; - } - if (keep) { - keep_indices->push_back(idx); - } - sorted_indices.erase(sorted_indices.begin()); - if (keep && nms_eta<1.0 & adaptive_threshold> 0.5) { - adaptive_threshold *= nms_eta; - } - } -} - -int MultiClassNMS::NMSForEachSample( - const float* boxes, const float* scores, int num_boxes, int num_classes, - std::map>* keep_indices) { - for (int i = 0; i < num_classes; ++i) { - if (i == background_label) { - continue; - } - const float* score_for_class_i = scores + i * num_boxes; - FastNMS(boxes, score_for_class_i, num_boxes, &((*keep_indices)[i])); - } - int num_det = 0; - for (auto iter = keep_indices->begin(); iter != keep_indices->end(); ++iter) { - num_det += iter->second.size(); - } - - if (keep_top_k > -1 && num_det > keep_top_k) { - std::vector>> score_index_pairs; - for (const auto& it : *keep_indices) { - int label = it.first; - const float* current_score = scores + label * num_boxes; - auto& label_indices = it.second; - for (size_t j = 0; j < label_indices.size(); ++j) { - int idx = label_indices[j]; - score_index_pairs.push_back( - std::make_pair(current_score[idx], std::make_pair(label, idx))); - } - } - std::stable_sort(score_index_pairs.begin(), score_index_pairs.end(), - SortScorePairDescend>); - score_index_pairs.resize(keep_top_k); - - std::map> new_indices; - for (size_t j = 0; j < score_index_pairs.size(); ++j) { - int label = score_index_pairs[j].second.first; - int idx = score_index_pairs[j].second.second; - new_indices[label].push_back(idx); - } - new_indices.swap(*keep_indices); - num_det = keep_top_k; - } - return num_det; -} - -void MultiClassNMS::Compute(const float* boxes_data, const float* scores_data, - const std::vector& boxes_dim, - const std::vector& scores_dim) { - int score_size = scores_dim.size(); - - int64_t batch_size = scores_dim[0]; - int64_t box_dim = boxes_dim[2]; - int64_t out_dim = box_dim + 2; - - int num_nmsed_out = 0; - FDASSERT(score_size == 3, "Require rank of input scores be 3, but now it's " + - std::to_string(score_size) + "."); - FDASSERT(boxes_dim[2] == 4, - "Require the 3-dimension of input boxes be 4, but now it's " + - std::to_string(boxes_dim[2]) + "."); - out_num_rois_data.resize(batch_size); - - std::vector>> all_indices; - for (size_t i = 0; i < batch_size; ++i) { - std::map> indices; // indices kept for each class - const float* current_boxes_ptr = - boxes_data + i * boxes_dim[1] * boxes_dim[2]; - const float* current_scores_ptr = - scores_data + i * scores_dim[1] * scores_dim[2]; - int num = NMSForEachSample(current_boxes_ptr, current_scores_ptr, - boxes_dim[1], scores_dim[1], &indices); - num_nmsed_out += num; - out_num_rois_data[i] = num; - all_indices.emplace_back(indices); - } - std::vector out_box_dims = {num_nmsed_out, 6}; - std::vector out_index_dims = {num_nmsed_out, 1}; - if (num_nmsed_out == 0) { - for (size_t i = 0; i < batch_size; ++i) { - out_num_rois_data[i] = 0; - } - return; - } - out_box_data.resize(num_nmsed_out * 6); - out_index_data.resize(num_nmsed_out); - - int count = 0; - for (size_t i = 0; i < batch_size; ++i) { - const float* current_boxes_ptr = - boxes_data + i * boxes_dim[1] * boxes_dim[2]; - const float* current_scores_ptr = - scores_data + i * scores_dim[1] * scores_dim[2]; - for (const auto& it : all_indices[i]) { - int label = it.first; - const auto& indices = it.second; - const float* current_scores_class_ptr = - current_scores_ptr + label * scores_dim[2]; - for (size_t j = 0; j < indices.size(); ++j) { - int start = count * 6; - out_box_data[start] = label; - out_box_data[start + 1] = current_scores_class_ptr[indices[j]]; - - out_box_data[start + 2] = current_boxes_ptr[indices[j] * 4]; - out_box_data[start + 3] = current_boxes_ptr[indices[j] * 4 + 1]; - out_box_data[start + 4] = current_boxes_ptr[indices[j] * 4 + 2]; - - out_box_data[start + 5] = current_boxes_ptr[indices[j] * 4 + 3]; - out_index_data[count] = i * boxes_dim[1] + indices[j]; - count += 1; - } - } - } -} -} // namespace backend -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/backends/common/multiclass_nms.h b/csrcs/fastdeploy/backends/common/multiclass_nms.h deleted file mode 100644 index 48a3d9336..000000000 --- a/csrcs/fastdeploy/backends/common/multiclass_nms.h +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include -#include -#include - -namespace fastdeploy { -namespace backend { -struct MultiClassNMS { - int64_t background_label = -1; - int64_t keep_top_k = -1; - float nms_eta; - float nms_threshold = 0.7; - int64_t nms_top_k; - bool normalized; - float score_threshold; - - std::vector out_num_rois_data; - std::vector out_index_data; - std::vector out_box_data; - void FastNMS(const float* boxes, const float* scores, const int& num_boxes, - std::vector* keep_indices); - int NMSForEachSample(const float* boxes, const float* scores, int num_boxes, - int num_classes, - std::map>* keep_indices); - void Compute(const float* boxes, const float* scores, - const std::vector& boxes_dim, - const std::vector& scores_dim); -}; -} // namespace backend - -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/backends/ort/ops/multiclass_nms.cc b/csrcs/fastdeploy/backends/ort/ops/multiclass_nms.cc deleted file mode 100644 index a132dbffc..000000000 --- a/csrcs/fastdeploy/backends/ort/ops/multiclass_nms.cc +++ /dev/null @@ -1,261 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef NON_64_PLATFORM - -#include "fastdeploy/backends/ort/ops/multiclass_nms.h" -#include -#include "fastdeploy/core/fd_tensor.h" -#include "fastdeploy/utils/utils.h" - -namespace fastdeploy { - -struct OrtTensorDimensions : std::vector { - OrtTensorDimensions(Ort::CustomOpApi ort, const OrtValue* value) { - OrtTensorTypeAndShapeInfo* info = ort.GetTensorTypeAndShape(value); - std::vector::operator=(ort.GetTensorShape(info)); - ort.ReleaseTensorTypeAndShapeInfo(info); - } -}; - -template -bool SortScorePairDescend(const std::pair& pair1, - const std::pair& pair2) { - return pair1.first > pair2.first; -} - -void GetMaxScoreIndex(const float* scores, const int& score_size, - const float& threshold, const int& top_k, - std::vector>* sorted_indices) { - for (size_t i = 0; i < score_size; ++i) { - if (scores[i] > threshold) { - sorted_indices->push_back(std::make_pair(scores[i], i)); - } - } - // Sort the score pair according to the scores in descending order - std::stable_sort(sorted_indices->begin(), sorted_indices->end(), - SortScorePairDescend); - // Keep top_k scores if needed. - if (top_k > -1 && top_k < static_cast(sorted_indices->size())) { - sorted_indices->resize(top_k); - } -} - -float BBoxArea(const float* box, const bool& normalized) { - if (box[2] < box[0] || box[3] < box[1]) { - // If coordinate values are is invalid - // (e.g. xmax < xmin or ymax < ymin), return 0. - return 0.f; - } else { - const float w = box[2] - box[0]; - const float h = box[3] - box[1]; - if (normalized) { - return w * h; - } else { - // If coordinate values are not within range [0, 1]. - return (w + 1) * (h + 1); - } - } -} - -float JaccardOverlap(const float* box1, const float* box2, - const bool& normalized) { - if (box2[0] > box1[2] || box2[2] < box1[0] || box2[1] > box1[3] || - box2[3] < box1[1]) { - return 0.f; - } else { - const float inter_xmin = std::max(box1[0], box2[0]); - const float inter_ymin = std::max(box1[1], box2[1]); - const float inter_xmax = std::min(box1[2], box2[2]); - const float inter_ymax = std::min(box1[3], box2[3]); - float norm = normalized ? 0.0f : 1.0f; - float inter_w = inter_xmax - inter_xmin + norm; - float inter_h = inter_ymax - inter_ymin + norm; - const float inter_area = inter_w * inter_h; - const float bbox1_area = BBoxArea(box1, normalized); - const float bbox2_area = BBoxArea(box2, normalized); - return inter_area / (bbox1_area + bbox2_area - inter_area); - } -} - -void MultiClassNmsKernel::FastNMS(const float* boxes, const float* scores, - const int& num_boxes, - std::vector* keep_indices) { - std::vector> sorted_indices; - GetMaxScoreIndex(scores, num_boxes, score_threshold, nms_top_k, - &sorted_indices); - - float adaptive_threshold = nms_threshold; - while (sorted_indices.size() != 0) { - const int idx = sorted_indices.front().second; - bool keep = true; - for (size_t k = 0; k < keep_indices->size(); ++k) { - if (!keep) { - break; - } - const int kept_idx = (*keep_indices)[k]; - float overlap = - JaccardOverlap(boxes + idx * 4, boxes + kept_idx * 4, normalized); - keep = overlap <= adaptive_threshold; - } - if (keep) { - keep_indices->push_back(idx); - } - sorted_indices.erase(sorted_indices.begin()); - if (keep && nms_eta<1.0 & adaptive_threshold> 0.5) { - adaptive_threshold *= nms_eta; - } - } -} - -int MultiClassNmsKernel::NMSForEachSample( - const float* boxes, const float* scores, int num_boxes, int num_classes, - std::map>* keep_indices) { - for (int i = 0; i < num_classes; ++i) { - if (i == background_label) { - continue; - } - const float* score_for_class_i = scores + i * num_boxes; - FastNMS(boxes, score_for_class_i, num_boxes, &((*keep_indices)[i])); - } - int num_det = 0; - for (auto iter = keep_indices->begin(); iter != keep_indices->end(); ++iter) { - num_det += iter->second.size(); - } - - if (keep_top_k > -1 && num_det > keep_top_k) { - std::vector>> score_index_pairs; - for (const auto& it : *keep_indices) { - int label = it.first; - const float* current_score = scores + label * num_boxes; - auto& label_indices = it.second; - for (size_t j = 0; j < label_indices.size(); ++j) { - int idx = label_indices[j]; - score_index_pairs.push_back( - std::make_pair(current_score[idx], std::make_pair(label, idx))); - } - } - std::stable_sort(score_index_pairs.begin(), score_index_pairs.end(), - SortScorePairDescend>); - score_index_pairs.resize(keep_top_k); - - std::map> new_indices; - for (size_t j = 0; j < score_index_pairs.size(); ++j) { - int label = score_index_pairs[j].second.first; - int idx = score_index_pairs[j].second.second; - new_indices[label].push_back(idx); - } - new_indices.swap(*keep_indices); - num_det = keep_top_k; - } - return num_det; -} - -void MultiClassNmsKernel::Compute(OrtKernelContext* context) { - const OrtValue* boxes = ort_.KernelContext_GetInput(context, 0); - const OrtValue* scores = ort_.KernelContext_GetInput(context, 1); - const float* boxes_data = - reinterpret_cast(ort_.GetTensorData(boxes)); - const float* scores_data = - reinterpret_cast(ort_.GetTensorData(scores)); - OrtTensorDimensions boxes_dim(ort_, boxes); - OrtTensorDimensions scores_dim(ort_, scores); - int score_size = scores_dim.size(); - - int64_t batch_size = scores_dim[0]; - int64_t box_dim = boxes_dim[2]; - int64_t out_dim = box_dim + 2; - - int num_nmsed_out = 0; - FDASSERT(score_size == 3, "Require rank of input scores be 3, but now it's " + - std::to_string(score_size) + "."); - FDASSERT(boxes_dim[2] == 4, - "Require the 3-dimension of input boxes be 4, but now it's " + - std::to_string(boxes_dim[2]) + "."); - std::vector out_num_rois_dims = {batch_size}; - OrtValue* out_num_rois = ort_.KernelContext_GetOutput( - context, 2, out_num_rois_dims.data(), out_num_rois_dims.size()); - int32_t* out_num_rois_data = ort_.GetTensorMutableData(out_num_rois); - - std::vector>> all_indices; - for (size_t i = 0; i < batch_size; ++i) { - std::map> indices; // indices kept for each class - const float* current_boxes_ptr = - boxes_data + i * boxes_dim[1] * boxes_dim[2]; - const float* current_scores_ptr = - scores_data + i * scores_dim[1] * scores_dim[2]; - int num = NMSForEachSample(current_boxes_ptr, current_scores_ptr, - boxes_dim[1], scores_dim[1], &indices); - num_nmsed_out += num; - out_num_rois_data[i] = num; - all_indices.emplace_back(indices); - } - std::vector out_box_dims = {num_nmsed_out, 6}; - std::vector out_index_dims = {num_nmsed_out, 1}; - OrtValue* out_box = ort_.KernelContext_GetOutput( - context, 0, out_box_dims.data(), out_box_dims.size()); - OrtValue* out_index = ort_.KernelContext_GetOutput( - context, 1, out_index_dims.data(), out_index_dims.size()); - if (num_nmsed_out == 0) { - int32_t* out_num_rois_data = - ort_.GetTensorMutableData(out_num_rois); - for (size_t i = 0; i < batch_size; ++i) { - out_num_rois_data[i] = 0; - } - return; - } - float* out_box_data = ort_.GetTensorMutableData(out_box); - int32_t* out_index_data = ort_.GetTensorMutableData(out_index); - - int count = 0; - for (size_t i = 0; i < batch_size; ++i) { - const float* current_boxes_ptr = - boxes_data + i * boxes_dim[1] * boxes_dim[2]; - const float* current_scores_ptr = - scores_data + i * scores_dim[1] * scores_dim[2]; - for (const auto& it : all_indices[i]) { - int label = it.first; - const auto& indices = it.second; - const float* current_scores_class_ptr = - current_scores_ptr + label * scores_dim[2]; - for (size_t j = 0; j < indices.size(); ++j) { - int start = count * 6; - out_box_data[start] = label; - out_box_data[start + 1] = current_scores_class_ptr[indices[j]]; - - out_box_data[start + 2] = current_boxes_ptr[indices[j] * 4]; - out_box_data[start + 3] = current_boxes_ptr[indices[j] * 4 + 1]; - out_box_data[start + 4] = current_boxes_ptr[indices[j] * 4 + 2]; - - out_box_data[start + 5] = current_boxes_ptr[indices[j] * 4 + 3]; - out_index_data[count] = i * boxes_dim[1] + indices[j]; - count += 1; - } - } - } -} - -void MultiClassNmsKernel::GetAttribute(const OrtKernelInfo* info) { - background_label = - ort_.KernelInfoGetAttribute(info, "background_label"); - keep_top_k = ort_.KernelInfoGetAttribute(info, "keep_top_k"); - nms_eta = ort_.KernelInfoGetAttribute(info, "nms_eta"); - nms_threshold = ort_.KernelInfoGetAttribute(info, "nms_threshold"); - nms_top_k = ort_.KernelInfoGetAttribute(info, "nms_top_k"); - normalized = ort_.KernelInfoGetAttribute(info, "normalized"); - score_threshold = ort_.KernelInfoGetAttribute(info, "score_threshold"); -} -} // namespace fastdeploy - -#endif \ No newline at end of file diff --git a/csrcs/fastdeploy/backends/ort/ops/multiclass_nms.h b/csrcs/fastdeploy/backends/ort/ops/multiclass_nms.h deleted file mode 100644 index 4e167d669..000000000 --- a/csrcs/fastdeploy/backends/ort/ops/multiclass_nms.h +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include - -#ifndef NON_64_PLATFORM -#include "onnxruntime_cxx_api.h" // NOLINT - -namespace fastdeploy { - -struct MultiClassNmsKernel { - protected: - int64_t background_label = -1; - int64_t keep_top_k = -1; - float nms_eta; - float nms_threshold = 0.7; - int64_t nms_top_k; - bool normalized; - float score_threshold; - Ort::CustomOpApi ort_; - - public: - MultiClassNmsKernel(Ort::CustomOpApi ort, const OrtKernelInfo* info) - : ort_(ort) { - GetAttribute(info); - } - - void GetAttribute(const OrtKernelInfo* info); - - void Compute(OrtKernelContext* context); - void FastNMS(const float* boxes, const float* scores, const int& num_boxes, - std::vector* keep_indices); - int NMSForEachSample(const float* boxes, const float* scores, int num_boxes, - int num_classes, - std::map>* keep_indices); -}; - -struct MultiClassNmsOp - : Ort::CustomOpBase { - void* CreateKernel(Ort::CustomOpApi api, const OrtKernelInfo* info) const { - return new MultiClassNmsKernel(api, info); - } - - const char* GetName() const { return "MultiClassNMS"; } - - size_t GetInputTypeCount() const { return 2; } - - ONNXTensorElementDataType GetInputType(size_t index) const { - return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; - } - - size_t GetOutputTypeCount() const { return 3; } - - ONNXTensorElementDataType GetOutputType(size_t index) const { - if (index == 0) { - return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; - } - return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32; - } - - const char* GetExecutionProviderType() const { - return "CPUExecutionProvider"; - } -}; - -} // namespace fastdeploy - -#endif \ No newline at end of file diff --git a/csrcs/fastdeploy/backends/ort/ort_backend.cc b/csrcs/fastdeploy/backends/ort/ort_backend.cc deleted file mode 100644 index c17890109..000000000 --- a/csrcs/fastdeploy/backends/ort/ort_backend.cc +++ /dev/null @@ -1,279 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/backends/ort/ort_backend.h" -#include -#include "fastdeploy/backends/ort/ops/multiclass_nms.h" -#include "fastdeploy/backends/ort/utils.h" -#include "fastdeploy/utils/utils.h" -#ifdef ENABLE_PADDLE_FRONTEND -#include "paddle2onnx/converter.h" -#endif - -namespace fastdeploy { - -std::vector OrtBackend::custom_operators_ = - std::vector(); - -void OrtBackend::BuildOption(const OrtBackendOption& option) { - option_ = option; - if (option.graph_optimization_level >= 0) { - session_options_.SetGraphOptimizationLevel( - GraphOptimizationLevel(option.graph_optimization_level)); - } - if (option.intra_op_num_threads >= 0) { - session_options_.SetIntraOpNumThreads(option.intra_op_num_threads); - } - if (option.inter_op_num_threads >= 0) { - session_options_.SetInterOpNumThreads(option.inter_op_num_threads); - } - if (option.execution_mode >= 0) { - session_options_.SetExecutionMode(ExecutionMode(option.execution_mode)); - } - if (option.use_gpu) { - auto all_providers = Ort::GetAvailableProviders(); - bool support_cuda = false; - std::string providers_msg = ""; - for (size_t i = 0; i < all_providers.size(); ++i) { - providers_msg = providers_msg + all_providers[i] + ", "; - if (all_providers[i] == "CUDAExecutionProvider") { - support_cuda = true; - } - } - if (!support_cuda) { - FDWARNING << "Compiled fastdeploy with onnxruntime doesn't " - "support GPU, the available providers are " - << providers_msg << "will fallback to CPUExecutionProvider." - << std::endl; - option_.use_gpu = false; - } else { - FDASSERT(option.gpu_id == 0, "Requires gpu_id == 0, but now gpu_id = " + - std::to_string(option.gpu_id) + "."); - OrtCUDAProviderOptions cuda_options; - cuda_options.device_id = option.gpu_id; - session_options_.AppendExecutionProvider_CUDA(cuda_options); - } - } -} - -bool OrtBackend::InitFromPaddle(const std::string& model_file, - const std::string& params_file, - const OrtBackendOption& option, bool verbose) { - if (initialized_) { - FDERROR << "OrtBackend is already initlized, cannot initialize again." - << std::endl; - return false; - } -#ifdef ENABLE_PADDLE_FRONTEND - char* model_content_ptr; - int model_content_size = 0; - - std::vector custom_ops; - for (auto& item : option.custom_op_info_) { - paddle2onnx::CustomOp op; - strcpy(op.op_name, item.first.c_str()); - strcpy(op.export_op_name, item.second.c_str()); - custom_ops.emplace_back(op); - } - if (!paddle2onnx::Export(model_file.c_str(), params_file.c_str(), - &model_content_ptr, &model_content_size, 11, true, - verbose, true, true, true, custom_ops.data(), - custom_ops.size())) { - FDERROR << "Error occured while export PaddlePaddle to ONNX format." - << std::endl; - return false; - } - - std::string onnx_model_proto(model_content_ptr, - model_content_ptr + model_content_size); - delete[] model_content_ptr; - model_content_ptr = nullptr; - return InitFromOnnx(onnx_model_proto, option, true); -#else - FDERROR << "Didn't compile with PaddlePaddle frontend, you can try to " - "call `InitFromOnnx` instead." - << std::endl; -#endif - return false; -} - -bool OrtBackend::InitFromOnnx(const std::string& model_file, - const OrtBackendOption& option, - bool from_memory_buffer) { - if (initialized_) { - FDERROR << "OrtBackend is already initlized, cannot initialize again." - << std::endl; - return false; - } - - BuildOption(option); - InitCustomOperators(); - if (from_memory_buffer) { - session_ = {env_, model_file.data(), model_file.size(), session_options_}; - } else { -#ifdef _WIN32 - session_ = {env_, - std::wstring(model_file.begin(), model_file.end()).c_str(), - session_options_}; -#else - session_ = {env_, model_file.c_str(), session_options_}; -#endif - } - binding_ = std::make_shared(session_); - - Ort::MemoryInfo memory_info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault); - Ort::Allocator allocator(session_, memory_info); - size_t n_inputs = session_.GetInputCount(); - for (size_t i = 0; i < n_inputs; ++i) { - auto input_name = session_.GetInputName(i, allocator); - auto type_info = session_.GetInputTypeInfo(i); - std::vector shape = - type_info.GetTensorTypeAndShapeInfo().GetShape(); - ONNXTensorElementDataType data_type = - type_info.GetTensorTypeAndShapeInfo().GetElementType(); - inputs_desc_.emplace_back(OrtValueInfo{input_name, shape, data_type}); - allocator.Free(input_name); - } - - size_t n_outputs = session_.GetOutputCount(); - for (size_t i = 0; i < n_outputs; ++i) { - auto output_name = session_.GetOutputName(i, allocator); - auto type_info = session_.GetOutputTypeInfo(i); - std::vector shape = - type_info.GetTensorTypeAndShapeInfo().GetShape(); - ONNXTensorElementDataType data_type = - type_info.GetTensorTypeAndShapeInfo().GetElementType(); - outputs_desc_.emplace_back(OrtValueInfo{output_name, shape, data_type}); - - Ort::MemoryInfo out_memory_info("Cpu", OrtDeviceAllocator, 0, - OrtMemTypeDefault); - binding_->BindOutput(output_name, out_memory_info); - - allocator.Free(output_name); - } - initialized_ = true; - return true; -} - -void OrtBackend::CopyToCpu(const Ort::Value& value, FDTensor* tensor) { - const auto info = value.GetTensorTypeAndShapeInfo(); - const auto data_type = info.GetElementType(); - size_t numel = info.GetElementCount(); - tensor->shape = info.GetShape(); - - if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) { - tensor->data.resize(numel * sizeof(float)); - memcpy(static_cast(tensor->Data()), value.GetTensorData(), - numel * sizeof(float)); - tensor->dtype = FDDataType::FP32; - } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32) { - tensor->data.resize(numel * sizeof(int32_t)); - memcpy(static_cast(tensor->Data()), value.GetTensorData(), - numel * sizeof(int32_t)); - tensor->dtype = FDDataType::INT32; - } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64) { - tensor->data.resize(numel * sizeof(int64_t)); - memcpy(static_cast(tensor->Data()), value.GetTensorData(), - numel * sizeof(int64_t)); - tensor->dtype = FDDataType::INT64; - } else if (data_type == ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE) { - tensor->data.resize(numel * sizeof(double)); - memcpy(static_cast(tensor->Data()), value.GetTensorData(), - numel * sizeof(double)); - tensor->dtype = FDDataType::FP64; - } else { - FDASSERT(false, "Unrecognized data type of " + std::to_string(data_type) + - " while calling OrtBackend::CopyToCpu()."); - } -} - -bool OrtBackend::Infer(std::vector& inputs, - std::vector* outputs) { - if (inputs.size() != inputs_desc_.size()) { - FDERROR << "[OrtBackend] Size of the inputs(" << inputs.size() - << ") should keep same with the inputs of this model(" - << inputs_desc_.size() << ")." << std::endl; - return false; - } - - // from FDTensor to Ort Inputs - for (size_t i = 0; i < inputs.size(); ++i) { - auto ort_value = CreateOrtValue(inputs[i], option_.use_gpu); - binding_->BindInput(inputs[i].name.c_str(), ort_value); - } - - for (size_t i = 0; i < outputs_desc_.size(); ++i) { - Ort::MemoryInfo memory_info("Cpu", OrtDeviceAllocator, 0, - OrtMemTypeDefault); - binding_->BindOutput(outputs_desc_[i].name.c_str(), memory_info); - } - - // Inference with inputs - try { - session_.Run({}, *(binding_.get())); - } catch (const std::exception& e) { - FDERROR << "Failed to Infer: " << e.what() << std::endl; - return false; - } - - // Copy result after inference - std::vector ort_outputs = binding_->GetOutputValues(); - outputs->resize(ort_outputs.size()); - for (size_t i = 0; i < ort_outputs.size(); ++i) { - (*outputs)[i].name = outputs_desc_[i].name; - CopyToCpu(ort_outputs[i], &((*outputs)[i])); - } - - return true; -} - -TensorInfo OrtBackend::GetInputInfo(int index) { - FDASSERT(index < NumInputs(), "The index:" + std::to_string(index) + - " should less than the number of inputs:" + - std::to_string(NumInputs()) + "."); - TensorInfo info; - info.name = inputs_desc_[index].name; - info.shape.assign(inputs_desc_[index].shape.begin(), - inputs_desc_[index].shape.end()); - info.dtype = GetFdDtype(inputs_desc_[index].dtype); - return info; -} - -TensorInfo OrtBackend::GetOutputInfo(int index) { - FDASSERT(index < NumOutputs(), - "The index:" + std::to_string(index) + - " should less than the number of outputs:" + - std::to_string(NumOutputs()) + "."); - TensorInfo info; - info.name = outputs_desc_[index].name; - info.shape.assign(outputs_desc_[index].shape.begin(), - outputs_desc_[index].shape.end()); - info.dtype = GetFdDtype(outputs_desc_[index].dtype); - return info; -} - -void OrtBackend::InitCustomOperators() { -#ifndef NON_64_PLATFORM - if (custom_operators_.size() == 0) { - MultiClassNmsOp* custom_op = new MultiClassNmsOp{}; - custom_operators_.push_back(custom_op); - } - for (size_t i = 0; i < custom_operators_.size(); ++i) { - custom_op_domain_.Add(custom_operators_[i]); - } - session_options_.Add(custom_op_domain_); -#endif -} - -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/backends/ort/ort_backend.h b/csrcs/fastdeploy/backends/ort/ort_backend.h deleted file mode 100644 index 5070934c6..000000000 --- a/csrcs/fastdeploy/backends/ort/ort_backend.h +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include -#include - -#include "fastdeploy/backends/backend.h" -#include "onnxruntime_cxx_api.h" // NOLINT - -namespace fastdeploy { - -struct OrtValueInfo { - std::string name; - std::vector shape; - ONNXTensorElementDataType dtype; -}; - -struct OrtBackendOption { - // -1 means default - // 0: ORT_DISABLE_ALL - // 1: ORT_ENABLE_BASIC - // 2: ORT_ENABLE_EXTENDED - // 99: ORT_ENABLE_ALL (enable some custom optimizations e.g bert) - int graph_optimization_level = -1; - int intra_op_num_threads = -1; - int inter_op_num_threads = -1; - // 0: ORT_SEQUENTIAL - // 1: ORT_PARALLEL - int execution_mode = -1; - bool use_gpu = false; - int gpu_id = 0; - - // inside parameter, maybe remove next version - bool remove_multiclass_nms_ = false; - std::map custom_op_info_; -}; - -class OrtBackend : public BaseBackend { - public: - OrtBackend() {} - virtual ~OrtBackend() = default; - - void BuildOption(const OrtBackendOption& option); - - bool InitFromPaddle(const std::string& model_file, - const std::string& params_file, - const OrtBackendOption& option = OrtBackendOption(), - bool verbose = false); - - bool InitFromOnnx(const std::string& model_file, - const OrtBackendOption& option = OrtBackendOption(), - bool from_memory_buffer = false); - - bool Infer(std::vector& inputs, std::vector* outputs); - - int NumInputs() const { return inputs_desc_.size(); } - - int NumOutputs() const { return outputs_desc_.size(); } - - TensorInfo GetInputInfo(int index); - TensorInfo GetOutputInfo(int index); - static std::vector custom_operators_; - void InitCustomOperators(); - - private: - Ort::Env env_; - Ort::Session session_{nullptr}; - Ort::SessionOptions session_options_; - std::shared_ptr binding_; - std::vector inputs_desc_; - std::vector outputs_desc_; -#ifndef NON_64_PLATFORM - Ort::CustomOpDomain custom_op_domain_ = Ort::CustomOpDomain("Paddle"); -#endif - OrtBackendOption option_; - void CopyToCpu(const Ort::Value& value, FDTensor* tensor); -}; -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/backends/ort/utils.cc b/csrcs/fastdeploy/backends/ort/utils.cc deleted file mode 100644 index ae3e45b86..000000000 --- a/csrcs/fastdeploy/backends/ort/utils.cc +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/backends/ort/utils.h" -#include "fastdeploy/utils/utils.h" - -namespace fastdeploy { - -ONNXTensorElementDataType GetOrtDtype(const FDDataType& fd_dtype) { - if (fd_dtype == FDDataType::FP32) { - return ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT; - } else if (fd_dtype == FDDataType::FP64) { - return ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE; - } else if (fd_dtype == FDDataType::INT32) { - return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32; - } else if (fd_dtype == FDDataType::INT64) { - return ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64; - } - FDERROR << "Unrecognized fastdeply data type:" << Str(fd_dtype) << "." - << std::endl; - return ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED; -} - -FDDataType GetFdDtype(const ONNXTensorElementDataType& ort_dtype) { - if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT) { - return FDDataType::FP32; - } else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE) { - return FDDataType::FP64; - } else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32) { - return FDDataType::INT32; - } else if (ort_dtype == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64) { - return FDDataType::INT64; - } - FDERROR << "Unrecognized ort data type:" << ort_dtype << "." << std::endl; - return FDDataType::FP32; -} - -Ort::Value CreateOrtValue(FDTensor& tensor, bool is_backend_cuda) { - FDASSERT(tensor.device == Device::GPU || tensor.device == Device::CPU, - "Only support tensor which device is CPU or GPU for OrtBackend."); - if (tensor.device == Device::GPU && is_backend_cuda) { - Ort::MemoryInfo memory_info("Cuda", OrtDeviceAllocator, 0, - OrtMemTypeDefault); - auto ort_value = Ort::Value::CreateTensor( - memory_info, tensor.MutableData(), tensor.Nbytes(), tensor.shape.data(), - tensor.shape.size(), GetOrtDtype(tensor.dtype)); - return ort_value; - } - Ort::MemoryInfo memory_info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault); - auto ort_value = Ort::Value::CreateTensor( - memory_info, tensor.Data(), tensor.Nbytes(), tensor.shape.data(), - tensor.shape.size(), GetOrtDtype(tensor.dtype)); - return ort_value; -} - -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/backends/ort/utils.h b/csrcs/fastdeploy/backends/ort/utils.h deleted file mode 100644 index e2912ad38..000000000 --- a/csrcs/fastdeploy/backends/ort/utils.h +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include -#include - -#include "fastdeploy/backends/backend.h" -#include "onnxruntime_cxx_api.h" // NOLINT - -namespace fastdeploy { - -// Convert FDDataType to OrtDataType -ONNXTensorElementDataType GetOrtDtype(const FDDataType& fd_dtype); - -// Convert OrtDataType to FDDataType -FDDataType GetFdDtype(const ONNXTensorElementDataType& ort_dtype); - -// Create Ort::Value -// is_backend_cuda specify if the onnxruntime use CUDAExectionProvider -// While is_backend_cuda = true, and tensor.device = Device::GPU -// Will directly share the cuda data in tensor to OrtValue -Ort::Value CreateOrtValue(FDTensor& tensor, bool is_backend_cuda = false); - -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/backends/paddle/paddle_backend.cc b/csrcs/fastdeploy/backends/paddle/paddle_backend.cc deleted file mode 100644 index 2fae38937..000000000 --- a/csrcs/fastdeploy/backends/paddle/paddle_backend.cc +++ /dev/null @@ -1,105 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/backends/paddle/paddle_backend.h" - -namespace fastdeploy { - -void PaddleBackend::BuildOption(const PaddleBackendOption& option) { - if (option.use_gpu) { - config_.EnableUseGpu(option.gpu_mem_init_size, option.gpu_id); - } else { - config_.DisableGpu(); - if (option.enable_mkldnn) { - config_.EnableMKLDNN(); - config_.SetMkldnnCacheCapacity(option.mkldnn_cache_size); - } - } - config_.SetCpuMathLibraryNumThreads(option.cpu_thread_num); -} - -bool PaddleBackend::InitFromPaddle(const std::string& model_file, - const std::string& params_file, - const PaddleBackendOption& option) { - if (initialized_) { - FDERROR << "PaddleBackend is already initlized, cannot initialize again." - << std::endl; - return false; - } - config_.SetModel(model_file, params_file); - BuildOption(option); - predictor_ = paddle_infer::CreatePredictor(config_); - std::vector input_names = predictor_->GetInputNames(); - std::vector output_names = predictor_->GetOutputNames(); - for (size_t i = 0; i < input_names.size(); ++i) { - auto handle = predictor_->GetInputHandle(input_names[i]); - TensorInfo info; - auto shape = handle->shape(); - info.shape.assign(shape.begin(), shape.end()); - info.dtype = PaddleDataTypeToFD(handle->type()); - info.name = input_names[i]; - inputs_desc_.emplace_back(info); - } - for (size_t i = 0; i < output_names.size(); ++i) { - auto handle = predictor_->GetOutputHandle(output_names[i]); - TensorInfo info; - auto shape = handle->shape(); - info.shape.assign(shape.begin(), shape.end()); - info.dtype = PaddleDataTypeToFD(handle->type()); - info.name = output_names[i]; - outputs_desc_.emplace_back(info); - } - initialized_ = true; - return true; -} - -TensorInfo PaddleBackend::GetInputInfo(int index) { - FDASSERT(index < NumInputs(), "The index:" + std::to_string(index) + - " should less than the number of inputs:" + - std::to_string(NumInputs()) + "."); - return inputs_desc_[index]; -} - -TensorInfo PaddleBackend::GetOutputInfo(int index) { - FDASSERT(index < NumOutputs(), - "The index:" + std::to_string(index) + - " should less than the number of outputs:" + - std::to_string(NumOutputs()) + "."); - return outputs_desc_[index]; -} - -bool PaddleBackend::Infer(std::vector& inputs, - std::vector* outputs) { - if (inputs.size() != inputs_desc_.size()) { - FDERROR << "[PaddleBackend] Size of inputs(" << inputs.size() - << ") should keep same with the inputs of this model(" - << inputs_desc_.size() << ")." << std::endl; - return false; - } - - for (size_t i = 0; i < inputs.size(); ++i) { - auto handle = predictor_->GetInputHandle(inputs[i].name); - ShareTensorFromCpu(handle.get(), inputs[i]); - } - - predictor_->Run(); - outputs->resize(outputs_desc_.size()); - for (size_t i = 0; i < outputs_desc_.size(); ++i) { - auto handle = predictor_->GetOutputHandle(outputs_desc_[i].name); - CopyTensorToCpu(handle, &((*outputs)[i])); - } - return true; -} - -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/backends/paddle/paddle_backend.h b/csrcs/fastdeploy/backends/paddle/paddle_backend.h deleted file mode 100644 index 99ca5eb1b..000000000 --- a/csrcs/fastdeploy/backends/paddle/paddle_backend.h +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include -#include - -#include "fastdeploy/backends/backend.h" -#include "paddle_inference_api.h" // NOLINT - -namespace fastdeploy { - -struct PaddleBackendOption { -#ifdef WITH_GPU - bool use_gpu = true; -#else - bool use_gpu = false; -#endif - bool enable_mkldnn = true; - - int mkldnn_cache_size = 1; - int cpu_thread_num = 8; - // initialize memory size(MB) for GPU - int gpu_mem_init_size = 100; - // gpu device id - int gpu_id = 0; -}; - -// Share memory buffer with paddle_infer::Tensor from fastdeploy::FDTensor -void ShareTensorFromCpu(paddle_infer::Tensor* tensor, FDTensor& fd_tensor); - -// Copy memory data from paddle_infer::Tensor to fastdeploy::FDTensor -void CopyTensorToCpu(std::unique_ptr& tensor, - FDTensor* fd_tensor); - -// Convert data type from paddle inference to fastdeploy -FDDataType PaddleDataTypeToFD(const paddle_infer::DataType& dtype); - -class PaddleBackend : public BaseBackend { - public: - PaddleBackend() {} - virtual ~PaddleBackend() = default; - void BuildOption(const PaddleBackendOption& option); - - bool InitFromPaddle( - const std::string& model_file, const std::string& params_file, - const PaddleBackendOption& option = PaddleBackendOption()); - - bool Infer(std::vector& inputs, std::vector* outputs); - - int NumInputs() const { return inputs_desc_.size(); } - - int NumOutputs() const { return outputs_desc_.size(); } - - TensorInfo GetInputInfo(int index); - TensorInfo GetOutputInfo(int index); - - private: - paddle_infer::Config config_; - std::shared_ptr predictor_; - std::vector inputs_desc_; - std::vector outputs_desc_; -}; -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/backends/paddle/util.cc b/csrcs/fastdeploy/backends/paddle/util.cc deleted file mode 100644 index 1ae5b3553..000000000 --- a/csrcs/fastdeploy/backends/paddle/util.cc +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/backends/paddle/paddle_backend.h" - -namespace fastdeploy { -void ShareTensorFromCpu(paddle_infer::Tensor* tensor, FDTensor& fd_tensor) { - std::vector shape(fd_tensor.shape.begin(), fd_tensor.shape.end()); - tensor->Reshape(shape); - if (fd_tensor.dtype == FDDataType::FP32) { - tensor->ShareExternalData(static_cast(fd_tensor.Data()), - shape, paddle_infer::PlaceType::kCPU); - return; - } else if (fd_tensor.dtype == FDDataType::INT32) { - tensor->ShareExternalData(static_cast(fd_tensor.Data()), - shape, paddle_infer::PlaceType::kCPU); - return; - } else if (fd_tensor.dtype == FDDataType::INT64) { - tensor->ShareExternalData(static_cast(fd_tensor.Data()), - shape, paddle_infer::PlaceType::kCPU); - return; - } - FDASSERT(false, "Unexpected data type(" + Str(fd_tensor.dtype) + - ") while infer with PaddleBackend."); -} - -void CopyTensorToCpu(std::unique_ptr& tensor, - FDTensor* fd_tensor) { - auto fd_dtype = PaddleDataTypeToFD(tensor->type()); - std::vector shape; - auto tmp_shape = tensor->shape(); - shape.assign(tmp_shape.begin(), tmp_shape.end()); - fd_tensor->Allocate(shape, fd_dtype, tensor->name()); - if (fd_tensor->dtype == FDDataType::FP32) { - tensor->CopyToCpu(static_cast(fd_tensor->MutableData())); - return; - } else if (fd_tensor->dtype == FDDataType::INT32) { - tensor->CopyToCpu(static_cast(fd_tensor->MutableData())); - return; - } else if (fd_tensor->dtype == FDDataType::INT64) { - tensor->CopyToCpu(static_cast(fd_tensor->MutableData())); - return; - } - FDASSERT(false, "Unexpected data type(" + Str(fd_tensor->dtype) + - ") while infer with PaddleBackend."); -} - -FDDataType PaddleDataTypeToFD(const paddle_infer::DataType& dtype) { - auto fd_dtype = FDDataType::FP32; - if (dtype == paddle_infer::FLOAT32) { - fd_dtype = FDDataType::FP32; - } else if (dtype == paddle_infer::INT64) { - fd_dtype = FDDataType::INT64; - } else if (dtype == paddle_infer::INT32) { - fd_dtype = FDDataType::INT32; - } else if (dtype == paddle_infer::UINT8) { - fd_dtype = FDDataType::UINT8; - } else { - FDASSERT(false, "Unexpected data type:" + std::to_string(int(dtype)) + - " while call CopyTensorToCpu in PaddleBackend."); - } - return fd_dtype; -} - -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/backends/tensorrt/common/BatchStream.h b/csrcs/fastdeploy/backends/tensorrt/common/BatchStream.h deleted file mode 100644 index 2484ccc68..000000000 --- a/csrcs/fastdeploy/backends/tensorrt/common/BatchStream.h +++ /dev/null @@ -1,342 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef BATCH_STREAM_H -#define BATCH_STREAM_H - -#include "NvInfer.h" -#include "common.h" -#include -#include -#include - -class IBatchStream { - public: - virtual void reset(int firstBatch) = 0; - virtual bool next() = 0; - virtual void skip(int skipCount) = 0; - virtual float* getBatch() = 0; - virtual float* getLabels() = 0; - virtual int getBatchesRead() const = 0; - virtual int getBatchSize() const = 0; - virtual nvinfer1::Dims getDims() const = 0; -}; - -class MNISTBatchStream : public IBatchStream { - public: - MNISTBatchStream(int batchSize, int maxBatches, const std::string& dataFile, - const std::string& labelsFile, - const std::vector& directories) - : mBatchSize{batchSize}, mMaxBatches{maxBatches}, mDims{3, {1, 28, 28}} - //!< We already know the dimensions of MNIST images. - { - readDataFile(locateFile(dataFile, directories)); - readLabelsFile(locateFile(labelsFile, directories)); - } - - void reset(int firstBatch) override { mBatchCount = firstBatch; } - - bool next() override { - if (mBatchCount >= mMaxBatches) { - return false; - } - ++mBatchCount; - return true; - } - - void skip(int skipCount) override { mBatchCount += skipCount; } - - float* getBatch() override { - return mData.data() + - (mBatchCount * mBatchSize * samplesCommon::volume(mDims)); - } - - float* getLabels() override { - return mLabels.data() + (mBatchCount * mBatchSize); - } - - int getBatchesRead() const override { return mBatchCount; } - - int getBatchSize() const override { return mBatchSize; } - - nvinfer1::Dims getDims() const override { - return Dims{4, {mBatchSize, mDims.d[0], mDims.d[1], mDims.d[2]}}; - } - - private: - void readDataFile(const std::string& dataFilePath) { - std::ifstream file{dataFilePath.c_str(), std::ios::binary}; - - int magicNumber, numImages, imageH, imageW; - file.read(reinterpret_cast(&magicNumber), sizeof(magicNumber)); - // All values in the MNIST files are big endian. - magicNumber = samplesCommon::swapEndianness(magicNumber); - ASSERT(magicNumber == 2051 && - "Magic Number does not match the expected value for an MNIST image " - "set"); - - // Read number of images and dimensions - file.read(reinterpret_cast(&numImages), sizeof(numImages)); - file.read(reinterpret_cast(&imageH), sizeof(imageH)); - file.read(reinterpret_cast(&imageW), sizeof(imageW)); - - numImages = samplesCommon::swapEndianness(numImages); - imageH = samplesCommon::swapEndianness(imageH); - imageW = samplesCommon::swapEndianness(imageW); - - // The MNIST data is made up of unsigned bytes, so we need to cast to float - // and normalize. - int numElements = numImages * imageH * imageW; - std::vector rawData(numElements); - file.read(reinterpret_cast(rawData.data()), - numElements * sizeof(uint8_t)); - mData.resize(numElements); - std::transform(rawData.begin(), rawData.end(), mData.begin(), - [](uint8_t val) { return static_cast(val) / 255.f; }); - } - - void readLabelsFile(const std::string& labelsFilePath) { - std::ifstream file{labelsFilePath.c_str(), std::ios::binary}; - int magicNumber, numImages; - file.read(reinterpret_cast(&magicNumber), sizeof(magicNumber)); - // All values in the MNIST files are big endian. - magicNumber = samplesCommon::swapEndianness(magicNumber); - ASSERT(magicNumber == 2049 && - "Magic Number does not match the expected value for an MNIST labels " - "file"); - - file.read(reinterpret_cast(&numImages), sizeof(numImages)); - numImages = samplesCommon::swapEndianness(numImages); - - std::vector rawLabels(numImages); - file.read(reinterpret_cast(rawLabels.data()), - numImages * sizeof(uint8_t)); - mLabels.resize(numImages); - std::transform(rawLabels.begin(), rawLabels.end(), mLabels.begin(), - [](uint8_t val) { return static_cast(val); }); - } - - int mBatchSize{0}; - int mBatchCount{ - 0}; //!< The batch that will be read on the next invocation of next() - int mMaxBatches{0}; - Dims mDims{}; - std::vector mData{}; - std::vector mLabels{}; -}; - -class BatchStream : public IBatchStream { - public: - BatchStream(int batchSize, int maxBatches, std::string prefix, - std::string suffix, std::vector directories) - : mBatchSize(batchSize), mMaxBatches(maxBatches), mPrefix(prefix), - mSuffix(suffix), mDataDir(directories) { - FILE* file = fopen( - locateFile(mPrefix + std::string("0") + mSuffix, mDataDir).c_str(), - "rb"); - ASSERT(file != nullptr); - int d[4]; - size_t readSize = fread(d, sizeof(int), 4, file); - ASSERT(readSize == 4); - mDims.nbDims = 4; // The number of dimensions. - mDims.d[0] = d[0]; // Batch Size - mDims.d[1] = d[1]; // Channels - mDims.d[2] = d[2]; // Height - mDims.d[3] = d[3]; // Width - ASSERT(mDims.d[0] > 0 && mDims.d[1] > 0 && mDims.d[2] > 0 && - mDims.d[3] > 0); - fclose(file); - - mImageSize = mDims.d[1] * mDims.d[2] * mDims.d[3]; - mBatch.resize(mBatchSize * mImageSize, 0); - mLabels.resize(mBatchSize, 0); - mFileBatch.resize(mDims.d[0] * mImageSize, 0); - mFileLabels.resize(mDims.d[0], 0); - reset(0); - } - - BatchStream(int batchSize, int maxBatches, std::string prefix, - std::vector directories) - : BatchStream(batchSize, maxBatches, prefix, ".batch", directories) {} - - BatchStream(int batchSize, int maxBatches, nvinfer1::Dims dims, - std::string listFile, std::vector directories) - : mBatchSize(batchSize), mMaxBatches(maxBatches), mDims(dims), - mListFile(listFile), mDataDir(directories) { - mImageSize = mDims.d[1] * mDims.d[2] * mDims.d[3]; - mBatch.resize(mBatchSize * mImageSize, 0); - mLabels.resize(mBatchSize, 0); - mFileBatch.resize(mDims.d[0] * mImageSize, 0); - mFileLabels.resize(mDims.d[0], 0); - reset(0); - } - - // Resets data members - void reset(int firstBatch) override { - mBatchCount = 0; - mFileCount = 0; - mFileBatchPos = mDims.d[0]; - skip(firstBatch); - } - - // Advance to next batch and return true, or return false if there is no batch - // left. - bool next() override { - if (mBatchCount == mMaxBatches) { - return false; - } - - for (int csize = 1, batchPos = 0; batchPos < mBatchSize; - batchPos += csize, mFileBatchPos += csize) { - ASSERT(mFileBatchPos > 0 && mFileBatchPos <= mDims.d[0]); - if (mFileBatchPos == mDims.d[0] && !update()) { - return false; - } - - // copy the smaller of: elements left to fulfill the request, or elements - // left in the file buffer. - csize = std::min(mBatchSize - batchPos, mDims.d[0] - mFileBatchPos); - std::copy_n(getFileBatch() + mFileBatchPos * mImageSize, - csize * mImageSize, getBatch() + batchPos * mImageSize); - std::copy_n(getFileLabels() + mFileBatchPos, csize, - getLabels() + batchPos); - } - mBatchCount++; - return true; - } - - // Skips the batches - void skip(int skipCount) override { - if (mBatchSize >= mDims.d[0] && mBatchSize % mDims.d[0] == 0 && - mFileBatchPos == mDims.d[0]) { - mFileCount += skipCount * mBatchSize / mDims.d[0]; - return; - } - - int x = mBatchCount; - for (int i = 0; i < skipCount; i++) { - next(); - } - mBatchCount = x; - } - - float* getBatch() override { return mBatch.data(); } - - float* getLabels() override { return mLabels.data(); } - - int getBatchesRead() const override { return mBatchCount; } - - int getBatchSize() const override { return mBatchSize; } - - nvinfer1::Dims getDims() const override { return mDims; } - - private: - float* getFileBatch() { return mFileBatch.data(); } - - float* getFileLabels() { return mFileLabels.data(); } - - bool update() { - if (mListFile.empty()) { - std::string inputFileName = locateFile( - mPrefix + std::to_string(mFileCount++) + mSuffix, mDataDir); - FILE* file = fopen(inputFileName.c_str(), "rb"); - if (!file) { - return false; - } - - int d[4]; - size_t readSize = fread(d, sizeof(int), 4, file); - ASSERT(readSize == 4); - ASSERT(mDims.d[0] == d[0] && mDims.d[1] == d[1] && mDims.d[2] == d[2] && - mDims.d[3] == d[3]); - size_t readInputCount = - fread(getFileBatch(), sizeof(float), mDims.d[0] * mImageSize, file); - ASSERT(readInputCount == size_t(mDims.d[0] * mImageSize)); - size_t readLabelCount = - fread(getFileLabels(), sizeof(float), mDims.d[0], file); - ASSERT(readLabelCount == 0 || readLabelCount == size_t(mDims.d[0])); - - fclose(file); - } else { - std::vector fNames; - std::ifstream file(locateFile(mListFile, mDataDir), std::ios::binary); - if (!file) { - return false; - } - - sample::gLogInfo << "Batch #" << mFileCount << std::endl; - file.seekg(((mBatchCount * mBatchSize)) * 7); - - for (int i = 1; i <= mBatchSize; i++) { - std::string sName; - std::getline(file, sName); - sName = sName + ".ppm"; - sample::gLogInfo << "Calibrating with file " << sName << std::endl; - fNames.emplace_back(sName); - } - - mFileCount++; - - const int imageC = 3; - const int imageH = 300; - const int imageW = 300; - std::vector> ppms( - fNames.size()); - for (uint32_t i = 0; i < fNames.size(); ++i) { - readPPMFile(locateFile(fNames[i], mDataDir), ppms[i]); - } - - std::vector data(samplesCommon::volume(mDims)); - const float scale = 2.0 / 255.0; - const float bias = 1.0; - long int volChl = mDims.d[2] * mDims.d[3]; - - // Normalize input data - for (int i = 0, volImg = mDims.d[1] * mDims.d[2] * mDims.d[3]; - i < mBatchSize; ++i) { - for (int c = 0; c < mDims.d[1]; ++c) { - for (int j = 0; j < volChl; ++j) { - data[i * volImg + c * volChl + j] = - scale * float(ppms[i].buffer[j * mDims.d[1] + c]) - bias; - } - } - } - - std::copy_n(data.data(), mDims.d[0] * mImageSize, getFileBatch()); - } - - mFileBatchPos = 0; - return true; - } - - int mBatchSize{0}; - int mMaxBatches{0}; - int mBatchCount{0}; - int mFileCount{0}; - int mFileBatchPos{0}; - int mImageSize{0}; - std::vector mBatch; //!< Data for the batch - std::vector mLabels; //!< Labels for the batch - std::vector mFileBatch; //!< List of image files - std::vector mFileLabels; //!< List of label files - std::string mPrefix; //!< Batch file name prefix - std::string mSuffix; //!< Batch file name suffix - nvinfer1::Dims mDims; //!< Input dimensions - std::string mListFile; //!< File name of the list of image names - std::vector - mDataDir; //!< Directories where the files can be found -}; - -#endif diff --git a/csrcs/fastdeploy/backends/tensorrt/common/CPPLINT.cfg b/csrcs/fastdeploy/backends/tensorrt/common/CPPLINT.cfg deleted file mode 100644 index 51ff339c1..000000000 --- a/csrcs/fastdeploy/backends/tensorrt/common/CPPLINT.cfg +++ /dev/null @@ -1 +0,0 @@ -exclude_files=.* diff --git a/csrcs/fastdeploy/backends/tensorrt/common/EntropyCalibrator.h b/csrcs/fastdeploy/backends/tensorrt/common/EntropyCalibrator.h deleted file mode 100644 index 40eb8f13e..000000000 --- a/csrcs/fastdeploy/backends/tensorrt/common/EntropyCalibrator.h +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ENTROPY_CALIBRATOR_H -#define ENTROPY_CALIBRATOR_H - -#include "BatchStream.h" -#include "NvInfer.h" - -//! \class EntropyCalibratorImpl -//! -//! \brief Implements common functionality for Entropy calibrators. -//! -template class EntropyCalibratorImpl { - public: - EntropyCalibratorImpl(TBatchStream stream, int firstBatch, - std::string networkName, const char* inputBlobName, - bool readCache = true) - : mStream{stream}, - mCalibrationTableName("CalibrationTable" + networkName), - mInputBlobName(inputBlobName), mReadCache(readCache) { - nvinfer1::Dims dims = mStream.getDims(); - mInputCount = samplesCommon::volume(dims); - CHECK(cudaMalloc(&mDeviceInput, mInputCount * sizeof(float))); - mStream.reset(firstBatch); - } - - virtual ~EntropyCalibratorImpl() { CHECK(cudaFree(mDeviceInput)); } - - int getBatchSize() const noexcept { return mStream.getBatchSize(); } - - bool getBatch(void* bindings[], const char* names[], - int nbBindings) noexcept { - if (!mStream.next()) { - return false; - } - CHECK(cudaMemcpy(mDeviceInput, mStream.getBatch(), - mInputCount * sizeof(float), cudaMemcpyHostToDevice)); - ASSERT(!strcmp(names[0], mInputBlobName)); - bindings[0] = mDeviceInput; - return true; - } - - const void* readCalibrationCache(size_t& length) noexcept { - mCalibrationCache.clear(); - std::ifstream input(mCalibrationTableName, std::ios::binary); - input >> std::noskipws; - if (mReadCache && input.good()) { - std::copy(std::istream_iterator(input), - std::istream_iterator(), - std::back_inserter(mCalibrationCache)); - } - length = mCalibrationCache.size(); - return length ? mCalibrationCache.data() : nullptr; - } - - void writeCalibrationCache(const void* cache, size_t length) noexcept { - std::ofstream output(mCalibrationTableName, std::ios::binary); - output.write(reinterpret_cast(cache), length); - } - - private: - TBatchStream mStream; - size_t mInputCount; - std::string mCalibrationTableName; - const char* mInputBlobName; - bool mReadCache{true}; - void* mDeviceInput{nullptr}; - std::vector mCalibrationCache; -}; - -//! \class Int8EntropyCalibrator2 -//! -//! \brief Implements Entropy calibrator 2. -//! CalibrationAlgoType is kENTROPY_CALIBRATION_2. -//! -template -class Int8EntropyCalibrator2 : public IInt8EntropyCalibrator2 { - public: - Int8EntropyCalibrator2(TBatchStream stream, int firstBatch, - const char* networkName, const char* inputBlobName, - bool readCache = true) - : mImpl(stream, firstBatch, networkName, inputBlobName, readCache) {} - - int getBatchSize() const noexcept override { return mImpl.getBatchSize(); } - - bool getBatch(void* bindings[], const char* names[], - int nbBindings) noexcept override { - return mImpl.getBatch(bindings, names, nbBindings); - } - - const void* readCalibrationCache(size_t& length) noexcept override { - return mImpl.readCalibrationCache(length); - } - - void writeCalibrationCache(const void* cache, - size_t length) noexcept override { - mImpl.writeCalibrationCache(cache, length); - } - - private: - EntropyCalibratorImpl mImpl; -}; - -#endif // ENTROPY_CALIBRATOR_H diff --git a/csrcs/fastdeploy/backends/tensorrt/common/ErrorRecorder.h b/csrcs/fastdeploy/backends/tensorrt/common/ErrorRecorder.h deleted file mode 100644 index e13b55bd9..000000000 --- a/csrcs/fastdeploy/backends/tensorrt/common/ErrorRecorder.h +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ERROR_RECORDER_H -#define ERROR_RECORDER_H -#include "NvInferRuntimeCommon.h" -#include "logger.h" -#include -#include -#include -#include -#include - -using nvinfer1::ErrorCode; -using nvinfer1::IErrorRecorder; - -//! -//! A simple implementation of the IErrorRecorder interface for -//! use by samples. This interface also can be used as a reference -//! implementation. -//! The sample Error recorder is based on a vector that pairs the error -//! code and the error string into a single element. It also uses -//! standard mutex's and atomics in order to make sure that the code -//! works in a multi-threaded environment. -//! -class SampleErrorRecorder : public IErrorRecorder { - using errorPair = std::pair; - using errorStack = std::vector; - - public: - SampleErrorRecorder() = default; - - virtual ~SampleErrorRecorder() noexcept {} - int32_t getNbErrors() const noexcept final { return mErrorStack.size(); } - ErrorCode getErrorCode(int32_t errorIdx) const noexcept final { - return invalidIndexCheck(errorIdx) ? ErrorCode::kINVALID_ARGUMENT - : (*this)[errorIdx].first; - }; - IErrorRecorder::ErrorDesc - getErrorDesc(int32_t errorIdx) const noexcept final { - return invalidIndexCheck(errorIdx) ? "errorIdx out of range." - : (*this)[errorIdx].second.c_str(); - } - // This class can never overflow since we have dynamic resize via std::vector - // usage. - bool hasOverflowed() const noexcept final { return false; } - - // Empty the errorStack. - void clear() noexcept final { - try { - // grab a lock so that there is no addition while clearing. - std::lock_guard guard(mStackLock); - mErrorStack.clear(); - } catch (const std::exception& e) { - sample::gLogFatal << "Internal Error: " << e.what() << std::endl; - } - }; - - //! Simple helper function that - bool empty() const noexcept { return mErrorStack.empty(); } - - bool reportError(ErrorCode val, - IErrorRecorder::ErrorDesc desc) noexcept final { - try { - std::lock_guard guard(mStackLock); - sample::gLogError << "Error[" << static_cast(val) - << "]: " << desc << std::endl; - mErrorStack.push_back(errorPair(val, desc)); - } catch (const std::exception& e) { - sample::gLogFatal << "Internal Error: " << e.what() << std::endl; - } - // All errors are considered fatal. - return true; - } - - // Atomically increment or decrement the ref counter. - IErrorRecorder::RefCount incRefCount() noexcept final { return ++mRefCount; } - IErrorRecorder::RefCount decRefCount() noexcept final { return --mRefCount; } - - private: - // Simple helper functions. - const errorPair& operator[](size_t index) const noexcept { - return mErrorStack[index]; - } - - bool invalidIndexCheck(int32_t index) const noexcept { - // By converting signed to unsigned, we only need a single check since - // negative numbers turn into large positive greater than the size. - size_t sIndex = index; - return sIndex >= mErrorStack.size(); - } - // Mutex to hold when locking mErrorStack. - std::mutex mStackLock; - - // Reference count of the class. Destruction of the class when mRefCount - // is not zero causes undefined behavior. - std::atomic mRefCount{0}; - - // The error stack that holds the errors recorded by TensorRT. - errorStack mErrorStack; -}; // class SampleErrorRecorder -#endif // ERROR_RECORDER_H diff --git a/csrcs/fastdeploy/backends/tensorrt/common/README.md b/csrcs/fastdeploy/backends/tensorrt/common/README.md deleted file mode 100644 index 0ed86b17a..000000000 --- a/csrcs/fastdeploy/backends/tensorrt/common/README.md +++ /dev/null @@ -1 +0,0 @@ -目录代码来源自 https://github.com/NVIDIA/TensorRT diff --git a/csrcs/fastdeploy/backends/tensorrt/common/argsParser.h b/csrcs/fastdeploy/backends/tensorrt/common/argsParser.h deleted file mode 100644 index e2e1b1e95..000000000 --- a/csrcs/fastdeploy/backends/tensorrt/common/argsParser.h +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef TENSORRT_ARGS_PARSER_H -#define TENSORRT_ARGS_PARSER_H - -#include -#include -#ifdef _MSC_VER -#include ".\windows\getopt.h" -#else -#include -#endif -#include - -namespace samplesCommon { - -//! -//! \brief The SampleParams structure groups the basic parameters required by -//! all sample networks. -//! -struct SampleParams { - int32_t batchSize{1}; //!< Number of inputs in a batch - int32_t dlaCore{-1}; //!< Specify the DLA core to run network on. - bool int8{false}; //!< Allow runnning the network in Int8 mode. - bool fp16{false}; //!< Allow running the network in FP16 mode. - std::vector - dataDirs; //!< Directory paths where sample data files are stored - std::vector inputTensorNames; - std::vector outputTensorNames; -}; - -//! -//! \brief The CaffeSampleParams structure groups the additional parameters -//! required by -//! networks that use caffe -//! -struct CaffeSampleParams : public SampleParams { - std::string - prototxtFileName; //!< Filename of prototxt design file of a network - std::string - weightsFileName; //!< Filename of trained weights file of a network - std::string meanFileName; //!< Filename of mean file of a network -}; - -//! -//! \brief The OnnxSampleParams structure groups the additional parameters -//! required by -//! networks that use ONNX -//! -struct OnnxSampleParams : public SampleParams { - std::string onnxFileName; //!< Filename of ONNX file of a network -}; - -//! -//! \brief The UffSampleParams structure groups the additional parameters -//! required by -//! networks that use Uff -//! -struct UffSampleParams : public SampleParams { - std::string uffFileName; //!< Filename of uff file of a network -}; - -//! -//! /brief Struct to maintain command-line arguments. -//! -struct Args { - bool runInInt8{false}; - bool runInFp16{false}; - bool help{false}; - int32_t useDLACore{-1}; - int32_t batch{1}; - std::vector dataDirs; - std::string saveEngine; - std::string loadEngine; - bool useILoop{false}; -}; - -//! -//! \brief Populates the Args struct with the provided command-line parameters. -//! -//! \throw invalid_argument if any of the arguments are not valid -//! -//! \return boolean If return value is true, execution can continue, otherwise -//! program should exit -//! -inline bool parseArgs(Args& args, int32_t argc, char* argv[]) { - while (1) { - int32_t arg; - static struct option long_options[] = { - {"help", no_argument, 0, 'h'}, - {"datadir", required_argument, 0, 'd'}, - {"int8", no_argument, 0, 'i'}, - {"fp16", no_argument, 0, 'f'}, - {"useILoop", no_argument, 0, 'l'}, - {"saveEngine", required_argument, 0, 's'}, - {"loadEngine", no_argument, 0, 'o'}, - {"useDLACore", required_argument, 0, 'u'}, - {"batch", required_argument, 0, 'b'}, - {nullptr, 0, nullptr, 0}}; - int32_t option_index = 0; - arg = getopt_long(argc, argv, "hd:iu", long_options, &option_index); - if (arg == -1) { - break; - } - - switch (arg) { - case 'h': - args.help = true; - return true; - case 'd': - if (optarg) { - args.dataDirs.push_back(optarg); - } else { - std::cerr << "ERROR: --datadir requires option argument" << std::endl; - return false; - } - break; - case 's': - if (optarg) { - args.saveEngine = optarg; - } - break; - case 'o': - if (optarg) { - args.loadEngine = optarg; - } - break; - case 'i': - args.runInInt8 = true; - break; - case 'f': - args.runInFp16 = true; - break; - case 'l': - args.useILoop = true; - break; - case 'u': - if (optarg) { - args.useDLACore = std::stoi(optarg); - } - break; - case 'b': - if (optarg) { - args.batch = std::stoi(optarg); - } - break; - default: - return false; - } - } - return true; -} - -} // namespace samplesCommon - -#endif // TENSORRT_ARGS_PARSER_H diff --git a/csrcs/fastdeploy/backends/tensorrt/common/buffers.h b/csrcs/fastdeploy/backends/tensorrt/common/buffers.h deleted file mode 100644 index 8061ee33d..000000000 --- a/csrcs/fastdeploy/backends/tensorrt/common/buffers.h +++ /dev/null @@ -1,426 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef TENSORRT_BUFFERS_H -#define TENSORRT_BUFFERS_H - -#include "NvInfer.h" -#include "common.h" -#include "half.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace samplesCommon { - -//! -//! \brief The GenericBuffer class is a templated class for buffers. -//! -//! \details This templated RAII (Resource Acquisition Is Initialization) class -//! handles the allocation, -//! deallocation, querying of buffers on both the device and the host. -//! It can handle data of arbitrary types because it stores byte -//! buffers. -//! The template parameters AllocFunc and FreeFunc are used for the -//! allocation and deallocation of the buffer. -//! AllocFunc must be a functor that takes in (void** ptr, size_t size) -//! and returns bool. ptr is a pointer to where the allocated buffer -//! address should be stored. -//! size is the amount of memory in bytes to allocate. -//! The boolean indicates whether or not the memory allocation was -//! successful. -//! FreeFunc must be a functor that takes in (void* ptr) and returns -//! void. -//! ptr is the allocated buffer address. It must work with nullptr -//! input. -//! -template class GenericBuffer { - public: - //! - //! \brief Construct an empty buffer. - //! - GenericBuffer(nvinfer1::DataType type = nvinfer1::DataType::kFLOAT) - : mSize(0), mCapacity(0), mType(type), mBuffer(nullptr) {} - - //! - //! \brief Construct a buffer with the specified allocation size in bytes. - //! - GenericBuffer(size_t size, nvinfer1::DataType type) - : mSize(size), mCapacity(size), mType(type) { - if (!allocFn(&mBuffer, this->nbBytes())) { - throw std::bad_alloc(); - } - } - - GenericBuffer(GenericBuffer&& buf) - : mSize(buf.mSize), mCapacity(buf.mCapacity), mType(buf.mType), - mBuffer(buf.mBuffer) { - buf.mSize = 0; - buf.mCapacity = 0; - buf.mType = nvinfer1::DataType::kFLOAT; - buf.mBuffer = nullptr; - } - - GenericBuffer& operator=(GenericBuffer&& buf) { - if (this != &buf) { - freeFn(mBuffer); - mSize = buf.mSize; - mCapacity = buf.mCapacity; - mType = buf.mType; - mBuffer = buf.mBuffer; - // Reset buf. - buf.mSize = 0; - buf.mCapacity = 0; - buf.mBuffer = nullptr; - } - return *this; - } - - //! - //! \brief Returns pointer to underlying array. - //! - void* data() { return mBuffer; } - - //! - //! \brief Returns pointer to underlying array. - //! - const void* data() const { return mBuffer; } - - //! - //! \brief Returns the size (in number of elements) of the buffer. - //! - size_t size() const { return mSize; } - - //! - //! \brief Returns the size (in bytes) of the buffer. - //! - size_t nbBytes() const { - return this->size() * samplesCommon::getElementSize(mType); - } - - //! - //! \brief Resizes the buffer. This is a no-op if the new size is smaller than - //! or equal to the current capacity. - //! - void resize(size_t newSize) { - mSize = newSize; - if (mCapacity < newSize) { - freeFn(mBuffer); - if (!allocFn(&mBuffer, this->nbBytes())) { - throw std::bad_alloc{}; - } - mCapacity = newSize; - } - } - - //! - //! \brief Overload of resize that accepts Dims - //! - void resize(const nvinfer1::Dims& dims) { - return this->resize(samplesCommon::volume(dims)); - } - - ~GenericBuffer() { freeFn(mBuffer); } - - private: - size_t mSize{0}, mCapacity{0}; - nvinfer1::DataType mType; - void* mBuffer; - AllocFunc allocFn; - FreeFunc freeFn; -}; - -class DeviceAllocator { - public: - bool operator()(void** ptr, size_t size) const { - return cudaMalloc(ptr, size) == cudaSuccess; - } -}; - -class DeviceFree { - public: - void operator()(void* ptr) const { cudaFree(ptr); } -}; - -class HostAllocator { - public: - bool operator()(void** ptr, size_t size) const { - *ptr = malloc(size); - return *ptr != nullptr; - } -}; - -class HostFree { - public: - void operator()(void* ptr) const { free(ptr); } -}; - -using DeviceBuffer = GenericBuffer; -using HostBuffer = GenericBuffer; - -//! -//! \brief The ManagedBuffer class groups together a pair of corresponding -//! device and host buffers. -//! -class ManagedBuffer { - public: - DeviceBuffer deviceBuffer; - HostBuffer hostBuffer; -}; - -//! -//! \brief The BufferManager class handles host and device buffer allocation -//! and deallocation. -//! -//! \details This RAII class handles host and device buffer allocation and -//! deallocation, -//! memcpy between host and device buffers to aid with inference, -//! and debugging dumps to validate inference. The BufferManager class -//! is meant to be -//! used to simplify buffer management and any interactions between -//! buffers and the engine. -//! -class BufferManager { - public: - static const size_t kINVALID_SIZE_VALUE = ~size_t(0); - - //! - //! \brief Create a BufferManager for handling buffer interactions with - //! engine. - //! - BufferManager(std::shared_ptr engine, - const int batchSize = 0, - const nvinfer1::IExecutionContext* context = nullptr) - : mEngine(engine), mBatchSize(batchSize) { - // Full Dims implies no batch size. - assert(engine->hasImplicitBatchDimension() || mBatchSize == 0); - // Create host and device buffers - for (int i = 0; i < mEngine->getNbBindings(); i++) { - auto dims = context ? context->getBindingDimensions(i) - : mEngine->getBindingDimensions(i); - size_t vol = context || !mBatchSize ? 1 : static_cast(mBatchSize); - nvinfer1::DataType type = mEngine->getBindingDataType(i); - int vecDim = mEngine->getBindingVectorizedDim(i); - if (-1 != vecDim) // i.e., 0 != lgScalarsPerVector - { - int scalarsPerVec = mEngine->getBindingComponentsPerElement(i); - dims.d[vecDim] = divUp(dims.d[vecDim], scalarsPerVec); - vol *= scalarsPerVec; - } - vol *= samplesCommon::volume(dims); - std::unique_ptr manBuf{new ManagedBuffer()}; - manBuf->deviceBuffer = DeviceBuffer(vol, type); - manBuf->hostBuffer = HostBuffer(vol, type); - mDeviceBindings.emplace_back(manBuf->deviceBuffer.data()); - mManagedBuffers.emplace_back(std::move(manBuf)); - } - } - - //! - //! \brief Returns a vector of device buffers that you can use directly as - //! bindings for the execute and enqueue methods of IExecutionContext. - //! - std::vector& getDeviceBindings() { return mDeviceBindings; } - - //! - //! \brief Returns a vector of device buffers. - //! - const std::vector& getDeviceBindings() const { - return mDeviceBindings; - } - - //! - //! \brief Returns the device buffer corresponding to tensorName. - //! Returns nullptr if no such tensor can be found. - //! - void* getDeviceBuffer(const std::string& tensorName) const { - return getBuffer(false, tensorName); - } - - //! - //! \brief Returns the host buffer corresponding to tensorName. - //! Returns nullptr if no such tensor can be found. - //! - void* getHostBuffer(const std::string& tensorName) const { - return getBuffer(true, tensorName); - } - - //! - //! \brief Returns the size of the host and device buffers that correspond to - //! tensorName. - //! Returns kINVALID_SIZE_VALUE if no such tensor can be found. - //! - size_t size(const std::string& tensorName) const { - int index = mEngine->getBindingIndex(tensorName.c_str()); - if (index == -1) - return kINVALID_SIZE_VALUE; - return mManagedBuffers[index]->hostBuffer.nbBytes(); - } - - //! - //! \brief Dump host buffer with specified tensorName to ostream. - //! Prints error message to std::ostream if no such tensor can be - //! found. - //! - void dumpBuffer(std::ostream& os, const std::string& tensorName) { - int index = mEngine->getBindingIndex(tensorName.c_str()); - if (index == -1) { - os << "Invalid tensor name" << std::endl; - return; - } - void* buf = mManagedBuffers[index]->hostBuffer.data(); - size_t bufSize = mManagedBuffers[index]->hostBuffer.nbBytes(); - nvinfer1::Dims bufDims = mEngine->getBindingDimensions(index); - size_t rowCount = static_cast( - bufDims.nbDims > 0 ? bufDims.d[bufDims.nbDims - 1] : mBatchSize); - int leadDim = mBatchSize; - int* trailDims = bufDims.d; - int nbDims = bufDims.nbDims; - - // Fix explicit Dimension networks - if (!leadDim && nbDims > 0) { - leadDim = bufDims.d[0]; - ++trailDims; - --nbDims; - } - - os << "[" << leadDim; - for (int i = 0; i < nbDims; i++) - os << ", " << trailDims[i]; - os << "]" << std::endl; - switch (mEngine->getBindingDataType(index)) { - case nvinfer1::DataType::kINT32: - print(os, buf, bufSize, rowCount); - break; - case nvinfer1::DataType::kFLOAT: - print(os, buf, bufSize, rowCount); - break; - case nvinfer1::DataType::kHALF: - print(os, buf, bufSize, rowCount); - break; - case nvinfer1::DataType::kINT8: - assert(0 && "Int8 network-level input and output is not supported"); - break; - case nvinfer1::DataType::kBOOL: - assert(0 && "Bool network-level input and output are not supported"); - break; - } - } - - //! - //! \brief Templated print function that dumps buffers of arbitrary type to - //! std::ostream. - //! rowCount parameter controls how many elements are on each line. - //! A rowCount of 1 means that there is only 1 element on each line. - //! - template - void print(std::ostream& os, void* buf, size_t bufSize, size_t rowCount) { - assert(rowCount != 0); - assert(bufSize % sizeof(T) == 0); - T* typedBuf = static_cast(buf); - size_t numItems = bufSize / sizeof(T); - for (int i = 0; i < static_cast(numItems); i++) { - // Handle rowCount == 1 case - if (rowCount == 1 && i != static_cast(numItems) - 1) - os << typedBuf[i] << std::endl; - else if (rowCount == 1) - os << typedBuf[i]; - // Handle rowCount > 1 case - else if (i % rowCount == 0) - os << typedBuf[i]; - else if (i % rowCount == rowCount - 1) - os << " " << typedBuf[i] << std::endl; - else - os << " " << typedBuf[i]; - } - } - - //! - //! \brief Copy the contents of input host buffers to input device buffers - //! synchronously. - //! - void copyInputToDevice() { memcpyBuffers(true, false, false); } - - //! - //! \brief Copy the contents of output device buffers to output host buffers - //! synchronously. - //! - void copyOutputToHost() { memcpyBuffers(false, true, false); } - - //! - //! \brief Copy the contents of input host buffers to input device buffers - //! asynchronously. - //! - void copyInputToDeviceAsync(const cudaStream_t& stream = 0) { - memcpyBuffers(true, false, true, stream); - } - - //! - //! \brief Copy the contents of output device buffers to output host buffers - //! asynchronously. - //! - void copyOutputToHostAsync(const cudaStream_t& stream = 0) { - memcpyBuffers(false, true, true, stream); - } - - ~BufferManager() = default; - - private: - void* getBuffer(const bool isHost, const std::string& tensorName) const { - int index = mEngine->getBindingIndex(tensorName.c_str()); - if (index == -1) - return nullptr; - return (isHost ? mManagedBuffers[index]->hostBuffer.data() - : mManagedBuffers[index]->deviceBuffer.data()); - } - - void memcpyBuffers(const bool copyInput, const bool deviceToHost, - const bool async, const cudaStream_t& stream = 0) { - for (int i = 0; i < mEngine->getNbBindings(); i++) { - void* dstPtr = deviceToHost ? mManagedBuffers[i]->hostBuffer.data() - : mManagedBuffers[i]->deviceBuffer.data(); - const void* srcPtr = deviceToHost - ? mManagedBuffers[i]->deviceBuffer.data() - : mManagedBuffers[i]->hostBuffer.data(); - const size_t byteSize = mManagedBuffers[i]->hostBuffer.nbBytes(); - const cudaMemcpyKind memcpyType = - deviceToHost ? cudaMemcpyDeviceToHost : cudaMemcpyHostToDevice; - if ((copyInput && mEngine->bindingIsInput(i)) || - (!copyInput && !mEngine->bindingIsInput(i))) { - if (async) - CHECK(cudaMemcpyAsync(dstPtr, srcPtr, byteSize, memcpyType, stream)); - else - CHECK(cudaMemcpy(dstPtr, srcPtr, byteSize, memcpyType)); - } - } - } - - std::shared_ptr mEngine; //!< The pointer to the engine - int mBatchSize; //!< The batch size for legacy networks, 0 otherwise. - std::vector> - mManagedBuffers; //!< The vector of pointers to managed buffers - std::vector mDeviceBindings; //!< The vector of device buffers needed - //! for engine execution -}; - -} // namespace samplesCommon - -#endif // TENSORRT_BUFFERS_H diff --git a/csrcs/fastdeploy/backends/tensorrt/common/common.h b/csrcs/fastdeploy/backends/tensorrt/common/common.h deleted file mode 100644 index ad3af72a2..000000000 --- a/csrcs/fastdeploy/backends/tensorrt/common/common.h +++ /dev/null @@ -1,844 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef TENSORRT_COMMON_H -#define TENSORRT_COMMON_H - -// For loadLibrary -#ifdef _MSC_VER -// Needed so that the max/min definitions in windows.h do not conflict with -// std::max/min. -#define NOMINMAX -#include -#undef NOMINMAX -#else -#include -#endif - -#include "NvInfer.h" -#include "NvInferPlugin.h" -#include "logger.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "safeCommon.h" - -using namespace nvinfer1; -using namespace plugin; - -#ifdef _MSC_VER -#define FN_NAME __FUNCTION__ -#else -#define FN_NAME __func__ -#endif - -#if defined(__aarch64__) || defined(__QNX__) -#define ENABLE_DLA_API 1 -#endif - -#define CHECK_RETURN_W_MSG(status, val, errMsg) \ - do { \ - if (!(status)) { \ - sample::gLogError << errMsg << " Error in " << __FILE__ << ", function " \ - << FN_NAME << "(), line " << __LINE__ << std::endl; \ - return val; \ - } \ - } while (0) - -#undef ASSERT -#define ASSERT(condition) \ - do { \ - if (!(condition)) { \ - sample::gLogError << "Assertion failure: " << #condition << std::endl; \ - abort(); \ - } \ - } while (0) - -#define CHECK_RETURN(status, val) CHECK_RETURN_W_MSG(status, val, "") - -#define OBJ_GUARD(A) std::unique_ptr - -template OBJ_GUARD(T) makeObjGuard(T_* t) { - CHECK(!(std::is_base_of::value || std::is_same::value)); - auto deleter = [](T* t) { t->destroy(); }; - return std::unique_ptr{static_cast(t), deleter}; -} - -constexpr long double operator"" _GiB(long double val) { - return val * (1 << 30); -} -constexpr long double operator"" _MiB(long double val) { - return val * (1 << 20); -} -constexpr long double operator"" _KiB(long double val) { - return val * (1 << 10); -} - -// These is necessary if we want to be able to write 1_GiB instead of 1.0_GiB. -// Since the return type is signed, -1_GiB will work as expected. -constexpr long long int operator"" _GiB(unsigned long long val) { - return val * (1 << 30); -} -constexpr long long int operator"" _MiB(unsigned long long val) { - return val * (1 << 20); -} -constexpr long long int operator"" _KiB(unsigned long long val) { - return val * (1 << 10); -} - -struct SimpleProfiler : public nvinfer1::IProfiler { - struct Record { - float time{0}; - int count{0}; - }; - - virtual void reportLayerTime(const char* layerName, float ms) noexcept { - mProfile[layerName].count++; - mProfile[layerName].time += ms; - if (std::find(mLayerNames.begin(), mLayerNames.end(), layerName) == - mLayerNames.end()) { - mLayerNames.push_back(layerName); - } - } - - SimpleProfiler(const char* name, - const std::vector& srcProfilers = - std::vector()) - : mName(name) { - for (const auto& srcProfiler : srcProfilers) { - for (const auto& rec : srcProfiler.mProfile) { - auto it = mProfile.find(rec.first); - if (it == mProfile.end()) { - mProfile.insert(rec); - } else { - it->second.time += rec.second.time; - it->second.count += rec.second.count; - } - } - } - } - - friend std::ostream& operator<<(std::ostream& out, - const SimpleProfiler& value) { - out << "========== " << value.mName << " profile ==========" << std::endl; - float totalTime = 0; - std::string layerNameStr = "TensorRT layer name"; - int maxLayerNameLength = - std::max(static_cast(layerNameStr.size()), 70); - for (const auto& elem : value.mProfile) { - totalTime += elem.second.time; - maxLayerNameLength = - std::max(maxLayerNameLength, static_cast(elem.first.size())); - } - - auto old_settings = out.flags(); - auto old_precision = out.precision(); - // Output header - { - out << std::setw(maxLayerNameLength) << layerNameStr << " "; - out << std::setw(12) << "Runtime, " - << "%" - << " "; - out << std::setw(12) << "Invocations" - << " "; - out << std::setw(12) << "Runtime, ms" << std::endl; - } - for (size_t i = 0; i < value.mLayerNames.size(); i++) { - const std::string layerName = value.mLayerNames[i]; - auto elem = value.mProfile.at(layerName); - out << std::setw(maxLayerNameLength) << layerName << " "; - out << std::setw(12) << std::fixed << std::setprecision(1) - << (elem.time * 100.0F / totalTime) << "%" - << " "; - out << std::setw(12) << elem.count << " "; - out << std::setw(12) << std::fixed << std::setprecision(2) << elem.time - << std::endl; - } - out.flags(old_settings); - out.precision(old_precision); - out << "========== " << value.mName << " total runtime = " << totalTime - << " ms ==========" << std::endl; - - return out; - } - - private: - std::string mName; - std::vector mLayerNames; - std::map mProfile; -}; - -//! Locate path to file, given its filename or filepath suffix and possible dirs -//! it might lie in. -//! Function will also walk back MAX_DEPTH dirs from CWD to check for such a -//! file path. -inline std::string locateFile(const std::string& filepathSuffix, - const std::vector& directories, - bool reportError = true) { - const int MAX_DEPTH{10}; - bool found{false}; - std::string filepath; - - for (auto& dir : directories) { - if (!dir.empty() && dir.back() != '/') { -#ifdef _MSC_VER - filepath = dir + "\\" + filepathSuffix; -#else - filepath = dir + "/" + filepathSuffix; -#endif - } else { - filepath = dir + filepathSuffix; - } - - for (int i = 0; i < MAX_DEPTH && !found; i++) { - const std::ifstream checkFile(filepath); - found = checkFile.is_open(); - if (found) { - break; - } - - filepath = "../" + filepath; // Try again in parent dir - } - - if (found) { - break; - } - - filepath.clear(); - } - - // Could not find the file - if (filepath.empty()) { - const std::string dirList = std::accumulate( - directories.begin() + 1, directories.end(), directories.front(), - [](const std::string& a, const std::string& b) { - return a + "\n\t" + b; - }); - std::cout << "Could not find " << filepathSuffix - << " in data directories:\n\t" << dirList << std::endl; - - if (reportError) { - std::cout << "&&&& FAILED" << std::endl; - exit(EXIT_FAILURE); - } - } - - return filepath; -} - -inline void readPGMFile(const std::string& fileName, uint8_t* buffer, int inH, - int inW) { - std::ifstream infile(fileName, std::ifstream::binary); - assert(infile.is_open() && - "Attempting to read from a file that is not open."); - std::string magic, h, w, max; - infile >> magic >> h >> w >> max; - infile.seekg(1, infile.cur); - infile.read(reinterpret_cast(buffer), inH * inW); -} - -namespace samplesCommon { - -// Swaps endianness of an integral type. -template ::value, int>::type = 0> -inline T swapEndianness(const T& value) { - uint8_t bytes[sizeof(T)]; - for (int i = 0; i < static_cast(sizeof(T)); ++i) { - bytes[sizeof(T) - 1 - i] = *(reinterpret_cast(&value) + i); - } - return *reinterpret_cast(bytes); -} - -class HostMemory { - public: - HostMemory() = delete; - virtual void* data() const noexcept { return mData; } - virtual std::size_t size() const noexcept { return mSize; } - virtual DataType type() const noexcept { return mType; } - virtual ~HostMemory() {} - - protected: - HostMemory(std::size_t size, DataType type) - : mData{nullptr}, mSize(size), mType(type) {} - void* mData; - std::size_t mSize; - DataType mType; -}; - -template -class TypedHostMemory : public HostMemory { - public: - explicit TypedHostMemory(std::size_t size) : HostMemory(size, dataType) { - mData = new ElemType[size]; - }; - ~TypedHostMemory() noexcept { delete[](ElemType*) mData; } - ElemType* raw() noexcept { return static_cast(data()); } -}; - -using FloatMemory = TypedHostMemory; -using HalfMemory = TypedHostMemory; -using ByteMemory = TypedHostMemory; - -inline void* safeCudaMalloc(size_t memSize) { - void* deviceMem; - CHECK(cudaMalloc(&deviceMem, memSize)); - if (deviceMem == nullptr) { - std::cerr << "Out of memory" << std::endl; - exit(1); - } - return deviceMem; -} - -inline bool isDebug() { return (std::getenv("TENSORRT_DEBUG") ? true : false); } - -struct InferDeleter { - template void operator()(T* obj) const { delete obj; } -}; - -template using SampleUniquePtr = std::unique_ptr; - -static auto StreamDeleter = [](cudaStream_t* pStream) { - if (pStream) { - cudaStreamDestroy(*pStream); - delete pStream; - } -}; - -inline std::unique_ptr makeCudaStream() { - std::unique_ptr pStream( - new cudaStream_t, StreamDeleter); - if (cudaStreamCreateWithFlags(pStream.get(), cudaStreamNonBlocking) != - cudaSuccess) { - pStream.reset(nullptr); - } - - return pStream; -} - -//! Return vector of indices that puts magnitudes of sequence in descending -//! order. -template -std::vector argMagnitudeSort(Iter begin, Iter end) { - std::vector indices(end - begin); - std::iota(indices.begin(), indices.end(), 0); - std::sort(indices.begin(), indices.end(), [&begin](size_t i, size_t j) { - return std::abs(begin[j]) < std::abs(begin[i]); - }); - return indices; -} - -inline bool readReferenceFile(const std::string& fileName, - std::vector& refVector) { - std::ifstream infile(fileName); - if (!infile.is_open()) { - std::cout << "ERROR: readReferenceFile: Attempting to read from a file " - "that is not open." - << std::endl; - return false; - } - std::string line; - while (std::getline(infile, line)) { - if (line.empty()) - continue; - refVector.push_back(line); - } - infile.close(); - return true; -} - -template -std::vector classify(const std::vector& refVector, - const std::vector& output, - const size_t topK) { - const auto inds = - samplesCommon::argMagnitudeSort(output.cbegin(), output.cend()); - std::vector result; - result.reserve(topK); - for (size_t k = 0; k < topK; ++k) { - result.push_back(refVector[inds[k]]); - } - return result; -} - -// Returns indices of highest K magnitudes in v. -template -std::vector topKMagnitudes(const std::vector& v, const size_t k) { - std::vector indices = - samplesCommon::argMagnitudeSort(v.cbegin(), v.cend()); - indices.resize(k); - return indices; -} - -template -bool readASCIIFile(const std::string& fileName, const size_t size, - std::vector& out) { - std::ifstream infile(fileName); - if (!infile.is_open()) { - std::cout << "ERROR readASCIIFile: Attempting to read from a file that is " - "not open." - << std::endl; - return false; - } - out.clear(); - out.reserve(size); - out.assign(std::istream_iterator(infile), std::istream_iterator()); - infile.close(); - return true; -} - -template -bool writeASCIIFile(const std::string& fileName, const std::vector& in) { - std::ofstream outfile(fileName); - if (!outfile.is_open()) { - std::cout << "ERROR: writeASCIIFile: Attempting to write to a file that is " - "not open." - << std::endl; - return false; - } - for (auto fn : in) { - outfile << fn << "\n"; - } - outfile.close(); - return true; -} - -inline void print_version() { - std::cout << " TensorRT version: " << NV_TENSORRT_MAJOR << "." - << NV_TENSORRT_MINOR << "." << NV_TENSORRT_PATCH << "." - << NV_TENSORRT_BUILD << std::endl; -} - -inline std::string getFileType(const std::string& filepath) { - return filepath.substr(filepath.find_last_of(".") + 1); -} - -inline std::string toLower(const std::string& inp) { - std::string out = inp; - std::transform(out.begin(), out.end(), out.begin(), ::tolower); - return out; -} - -inline float getMaxValue(const float* buffer, int64_t size) { - assert(buffer != nullptr); - assert(size > 0); - return *std::max_element(buffer, buffer + size); -} - -// Ensures that every tensor used by a network has a dynamic range set. -// -// All tensors in a network must have a dynamic range specified if a calibrator -// is not used. -// This function is just a utility to globally fill in missing scales and -// zero-points for the entire network. -// -// If a tensor does not have a dyanamic range set, it is assigned inRange or -// outRange as follows: -// -// * If the tensor is the input to a layer or output of a pooling node, its -// dynamic range is derived from inRange. -// * Otherwise its dynamic range is derived from outRange. -// -// The default parameter values are intended to demonstrate, for final layers in -// the network, -// cases where dynamic ranges are asymmetric. -// -// The default parameter values choosen arbitrarily. Range values should be -// choosen such that -// we avoid underflow or overflow. Also range value should be non zero to avoid -// uniform zero scale tensor. -inline void setAllDynamicRanges(INetworkDefinition* network, - float inRange = 2.0f, float outRange = 4.0f) { - // Ensure that all layer inputs have a scale. - for (int i = 0; i < network->getNbLayers(); i++) { - auto layer = network->getLayer(i); - for (int j = 0; j < layer->getNbInputs(); j++) { - ITensor* input{layer->getInput(j)}; - // Optional inputs are nullptr here and are from RNN layers. - if (input != nullptr && !input->dynamicRangeIsSet()) { - ASSERT(input->setDynamicRange(-inRange, inRange)); - } - } - } - - // Ensure that all layer outputs have a scale. - // Tensors that are also inputs to layers are ingored here - // since the previous loop nest assigned scales to them. - for (int i = 0; i < network->getNbLayers(); i++) { - auto layer = network->getLayer(i); - for (int j = 0; j < layer->getNbOutputs(); j++) { - ITensor* output{layer->getOutput(j)}; - // Optional outputs are nullptr here and are from RNN layers. - if (output != nullptr && !output->dynamicRangeIsSet()) { - // Pooling must have the same input and output scales. - if (layer->getType() == LayerType::kPOOLING) { - ASSERT(output->setDynamicRange(-inRange, inRange)); - } else { - ASSERT(output->setDynamicRange(-outRange, outRange)); - } - } - } - } -} - -inline void setDummyInt8DynamicRanges(const IBuilderConfig* c, - INetworkDefinition* n) { - // Set dummy per-tensor dynamic range if Int8 mode is requested. - if (c->getFlag(BuilderFlag::kINT8)) { - sample::gLogWarning << "Int8 calibrator not provided. Generating dummy " - "per-tensor dynamic range. Int8 accuracy is not " - "guaranteed." - << std::endl; - setAllDynamicRanges(n); - } -} - -inline void enableDLA(IBuilder* builder, IBuilderConfig* config, int useDLACore, - bool allowGPUFallback = true) { - if (useDLACore >= 0) { - if (builder->getNbDLACores() == 0) { - std::cerr << "Trying to use DLA core " << useDLACore - << " on a platform that doesn't have any DLA cores" - << std::endl; - assert( - "Error: use DLA core on a platfrom that doesn't have any DLA cores" && - false); - } - if (allowGPUFallback) { - config->setFlag(BuilderFlag::kGPU_FALLBACK); - } - if (!config->getFlag(BuilderFlag::kINT8)) { - // User has not requested INT8 Mode. - // By default run in FP16 mode. FP32 mode is not permitted. - config->setFlag(BuilderFlag::kFP16); - } - config->setDefaultDeviceType(DeviceType::kDLA); - config->setDLACore(useDLACore); - } -} - -inline int32_t parseDLA(int32_t argc, char** argv) { - for (int32_t i = 1; i < argc; i++) { - if (strncmp(argv[i], "--useDLACore=", 13) == 0) { - return std::stoi(argv[i] + 13); - } - } - return -1; -} - -inline uint32_t getElementSize(nvinfer1::DataType t) noexcept { - switch (t) { - case nvinfer1::DataType::kINT32: - return 4; - case nvinfer1::DataType::kFLOAT: - return 4; - case nvinfer1::DataType::kHALF: - return 2; - case nvinfer1::DataType::kBOOL: - case nvinfer1::DataType::kINT8: - return 1; - } - return 0; -} - -inline int64_t volume(const nvinfer1::Dims& d) { - return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies()); -} - -template struct PPM { - std::string magic, fileName; - int h, w, max; - uint8_t buffer[C * H * W]; -}; - -// New vPPM(variable sized PPM) class with variable dimensions. -struct vPPM { - std::string magic, fileName; - int h, w, max; - std::vector buffer; -}; - -struct BBox { - float x1, y1, x2, y2; -}; - -template -void readPPMFile(const std::string& filename, - samplesCommon::PPM& ppm) { - ppm.fileName = filename; - std::ifstream infile(filename, std::ifstream::binary); - assert(infile.is_open() && - "Attempting to read from a file that is not open."); - infile >> ppm.magic >> ppm.w >> ppm.h >> ppm.max; - infile.seekg(1, infile.cur); - infile.read(reinterpret_cast(ppm.buffer), ppm.w * ppm.h * 3); -} - -inline void readPPMFile(const std::string& filename, vPPM& ppm, - std::vector& input_dir) { - ppm.fileName = filename; - std::ifstream infile(locateFile(filename, input_dir), std::ifstream::binary); - infile >> ppm.magic >> ppm.w >> ppm.h >> ppm.max; - infile.seekg(1, infile.cur); - - for (int i = 0; i < ppm.w * ppm.h * 3; ++i) { - ppm.buffer.push_back(0); - } - - infile.read(reinterpret_cast(&ppm.buffer[0]), ppm.w * ppm.h * 3); -} - -template -void writePPMFileWithBBox(const std::string& filename, PPM& ppm, - const BBox& bbox) { - std::ofstream outfile("./" + filename, std::ofstream::binary); - assert(!outfile.fail()); - outfile << "P6" - << "\n" - << ppm.w << " " << ppm.h << "\n" - << ppm.max << "\n"; - - auto round = [](float x) -> int { return int(std::floor(x + 0.5f)); }; - const int x1 = std::min(std::max(0, round(int(bbox.x1))), W - 1); - const int x2 = std::min(std::max(0, round(int(bbox.x2))), W - 1); - const int y1 = std::min(std::max(0, round(int(bbox.y1))), H - 1); - const int y2 = std::min(std::max(0, round(int(bbox.y2))), H - 1); - - for (int x = x1; x <= x2; ++x) { - // bbox top border - ppm.buffer[(y1 * ppm.w + x) * 3] = 255; - ppm.buffer[(y1 * ppm.w + x) * 3 + 1] = 0; - ppm.buffer[(y1 * ppm.w + x) * 3 + 2] = 0; - // bbox bottom border - ppm.buffer[(y2 * ppm.w + x) * 3] = 255; - ppm.buffer[(y2 * ppm.w + x) * 3 + 1] = 0; - ppm.buffer[(y2 * ppm.w + x) * 3 + 2] = 0; - } - - for (int y = y1; y <= y2; ++y) { - // bbox left border - ppm.buffer[(y * ppm.w + x1) * 3] = 255; - ppm.buffer[(y * ppm.w + x1) * 3 + 1] = 0; - ppm.buffer[(y * ppm.w + x1) * 3 + 2] = 0; - // bbox right border - ppm.buffer[(y * ppm.w + x2) * 3] = 255; - ppm.buffer[(y * ppm.w + x2) * 3 + 1] = 0; - ppm.buffer[(y * ppm.w + x2) * 3 + 2] = 0; - } - - outfile.write(reinterpret_cast(ppm.buffer), ppm.w * ppm.h * 3); -} - -inline void writePPMFileWithBBox(const std::string& filename, vPPM ppm, - std::vector& dets) { - std::ofstream outfile("./" + filename, std::ofstream::binary); - assert(!outfile.fail()); - outfile << "P6" - << "\n" - << ppm.w << " " << ppm.h << "\n" - << ppm.max << "\n"; - auto round = [](float x) -> int { return int(std::floor(x + 0.5f)); }; - - for (auto bbox : dets) { - for (int x = int(bbox.x1); x < int(bbox.x2); ++x) { - // bbox top border - ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3] = 255; - ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3 + 1] = 0; - ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3 + 2] = 0; - // bbox bottom border - ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3] = 255; - ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3 + 1] = 0; - ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3 + 2] = 0; - } - - for (int y = int(bbox.y1); y < int(bbox.y2); ++y) { - // bbox left border - ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3] = 255; - ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3 + 1] = 0; - ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3 + 2] = 0; - // bbox right border - ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3] = 255; - ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3 + 1] = 0; - ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3 + 2] = 0; - } - } - - outfile.write(reinterpret_cast(&ppm.buffer[0]), ppm.w * ppm.h * 3); -} - -class TimerBase { - public: - virtual void start() {} - virtual void stop() {} - float microseconds() const noexcept { return mMs * 1000.f; } - float milliseconds() const noexcept { return mMs; } - float seconds() const noexcept { return mMs / 1000.f; } - void reset() noexcept { mMs = 0.f; } - - protected: - float mMs{0.0f}; -}; - -class GpuTimer : public TimerBase { - public: - explicit GpuTimer(cudaStream_t stream) : mStream(stream) { - CHECK(cudaEventCreate(&mStart)); - CHECK(cudaEventCreate(&mStop)); - } - ~GpuTimer() { - CHECK(cudaEventDestroy(mStart)); - CHECK(cudaEventDestroy(mStop)); - } - void start() { CHECK(cudaEventRecord(mStart, mStream)); } - void stop() { - CHECK(cudaEventRecord(mStop, mStream)); - float ms{0.0f}; - CHECK(cudaEventSynchronize(mStop)); - CHECK(cudaEventElapsedTime(&ms, mStart, mStop)); - mMs += ms; - } - - private: - cudaEvent_t mStart, mStop; - cudaStream_t mStream; -}; // class GpuTimer - -template class CpuTimer : public TimerBase { - public: - using clock_type = Clock; - - void start() { mStart = Clock::now(); } - void stop() { - mStop = Clock::now(); - mMs += std::chrono::duration{mStop - mStart}.count(); - } - - private: - std::chrono::time_point mStart, mStop; -}; // class CpuTimer - -using PreciseCpuTimer = CpuTimer; - -inline std::vector splitString(std::string str, - char delimiter = ',') { - std::vector splitVect; - std::stringstream ss(str); - std::string substr; - - while (ss.good()) { - getline(ss, substr, delimiter); - splitVect.emplace_back(std::move(substr)); - } - return splitVect; -} - -// Return m rounded up to nearest multiple of n -inline int roundUp(int m, int n) { return ((m + n - 1) / n) * n; } - -inline int getC(const Dims& d) { return d.nbDims >= 3 ? d.d[d.nbDims - 3] : 1; } - -inline int getH(const Dims& d) { return d.nbDims >= 2 ? d.d[d.nbDims - 2] : 1; } - -inline int getW(const Dims& d) { return d.nbDims >= 1 ? d.d[d.nbDims - 1] : 1; } - -inline void loadLibrary(const std::string& path) { -#ifdef _MSC_VER - void* handle = LoadLibrary(path.c_str()); -#else - int32_t flags{RTLD_LAZY}; -#if ENABLE_ASAN - // https://github.com/google/sanitizers/issues/89 - // asan doesn't handle module unloading correctly and there are no plans on - // doing - // so. In order to get proper stack traces, don't delete the shared library on - // close so that asan can resolve the symbols correctly. - flags |= RTLD_NODELETE; -#endif // ENABLE_ASAN - - void* handle = dlopen(path.c_str(), flags); -#endif - if (handle == nullptr) { -#ifdef _MSC_VER - sample::gLogError << "Could not load plugin library: " << path << std::endl; -#else - sample::gLogError << "Could not load plugin library: " << path - << ", due to: " << dlerror() << std::endl; -#endif - } -} - -inline int32_t getSMVersion() { - int32_t deviceIndex = 0; - CHECK(cudaGetDevice(&deviceIndex)); - - int32_t major, minor; - CHECK(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, - deviceIndex)); - CHECK(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, - deviceIndex)); - - return ((major << 8) | minor); -} - -inline bool isSMSafe() { - const int32_t smVersion = getSMVersion(); - return smVersion == 0x0700 || smVersion == 0x0702 || smVersion == 0x0705 || - smVersion == 0x0800 || smVersion == 0x0806 || smVersion == 0x0807; -} - -inline bool isDataTypeSupported(DataType dataType) { - auto builder = SampleUniquePtr( - nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger())); - if (!builder) { - return false; - } - - if ((dataType == DataType::kINT8 && !builder->platformHasFastInt8()) || - (dataType == DataType::kHALF && !builder->platformHasFastFp16())) { - return false; - } - - return true; -} - -} // namespace samplesCommon - -inline std::ostream& operator<<(std::ostream& os, const nvinfer1::Dims& dims) { - os << "("; - for (int i = 0; i < dims.nbDims; ++i) { - os << (i ? ", " : "") << dims.d[i]; - } - return os << ")"; -} - -#endif // TENSORRT_COMMON_H diff --git a/csrcs/fastdeploy/backends/tensorrt/common/getOptions.cpp b/csrcs/fastdeploy/backends/tensorrt/common/getOptions.cpp deleted file mode 100644 index 84b06581a..000000000 --- a/csrcs/fastdeploy/backends/tensorrt/common/getOptions.cpp +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "getOptions.h" -#include "logger.h" - -#include -#include -#include -#include -#include - -namespace nvinfer1 { -namespace utility { - -//! Matching for TRTOptions is defined as follows: -//! -//! If A and B both have longName set, A matches B if and only if A.longName == -//! B.longName and (A.shortName == B.shortName if both have short name set). -//! -//! If A only has shortName set and B only has longName set, then A does not -//! match B. It is assumed that when 2 TRTOptions are compared, one of them is -//! the definition of a TRTOption in the input to getOptions. As such, if the -//! definition only has shortName set, it will never be equal to a TRTOption -//! that does not have shortName set (and same for longName). -//! -//! If A and B both have shortName set but B does not have longName set, A -//! matches B if and only if A.shortName == B.shortName. -//! -//! If A has neither long or short name set, A matches B if and only if B has -//! neither long or short name set. -bool matches(const TRTOption& a, const TRTOption& b) { - if (!a.longName.empty() && !b.longName.empty()) { - if (a.shortName && b.shortName) { - return (a.longName == b.longName) && (a.shortName == b.shortName); - } - return a.longName == b.longName; - } - - // If only one of them is not set, this will return false anyway. - return a.shortName == b.shortName; -} - -//! getTRTOptionIndex returns the index of a TRTOption in a vector of -//! TRTOptions, -1 if not found. -int getTRTOptionIndex(const std::vector& options, - const TRTOption& opt) { - for (size_t i = 0; i < options.size(); ++i) { - if (matches(opt, options[i])) { - return i; - } - } - return -1; -} - -//! validateTRTOption will return a string containing an error message if -//! options -//! contain non-numeric characters, or if there are duplicate option names -//! found. -//! Otherwise, returns the empty string. -std::string validateTRTOption(const std::set& seenShortNames, - const std::set& seenLongNames, - const TRTOption& opt) { - if (opt.shortName != 0) { - if (!std::isalnum(opt.shortName)) { - return "Short name '" + std::to_string(opt.shortName) + - "' is non-alphanumeric"; - } - - if (seenShortNames.find(opt.shortName) != seenShortNames.end()) { - return "Short name '" + std::to_string(opt.shortName) + - "' is a duplicate"; - } - } - - if (!opt.longName.empty()) { - for (const char& c : opt.longName) { - if (!std::isalnum(c) && c != '-' && c != '_') { - return "Long name '" + opt.longName + - "' contains characters that are not '-', '_', or alphanumeric"; - } - } - - if (seenLongNames.find(opt.longName) != seenLongNames.end()) { - return "Long name '" + opt.longName + "' is a duplicate"; - } - } - return ""; -} - -//! validateTRTOptions will return a string containing an error message if any -//! options contain non-numeric characters, or if there are duplicate option -//! names found. Otherwise, returns the empty string. -std::string validateTRTOptions(const std::vector& options) { - std::set seenShortNames; - std::set seenLongNames; - for (size_t i = 0; i < options.size(); ++i) { - const std::string errMsg = - validateTRTOption(seenShortNames, seenLongNames, options[i]); - if (!errMsg.empty()) { - return "Error '" + errMsg + "' at TRTOption " + std::to_string(i); - } - - seenShortNames.insert(options[i].shortName); - seenLongNames.insert(options[i].longName); - } - return ""; -} - -//! parseArgs parses an argument list and returns a TRTParsedArgs with the -//! fields set accordingly. Assumes that options is validated. -//! ErrMsg will be set if: -//! - an argument is null -//! - an argument is empty -//! - an argument does not have option (i.e. "-" and "--") -//! - a short argument has more than 1 character -//! - the last argument in the list requires a value -TRTParsedArgs parseArgs(int argc, const char* const* argv, - const std::vector& options) { - TRTParsedArgs parsedArgs; - parsedArgs.values.resize(options.size()); - - for (int i = 1; i < argc; ++i) // index of current command-line argument - { - if (argv[i] == nullptr) { - return TRTParsedArgs{"Null argument at index " + std::to_string(i)}; - } - - const std::string argStr(argv[i]); - if (argStr.empty()) { - return TRTParsedArgs{"Empty argument at index " + std::to_string(i)}; - } - - // No starting hyphen means it is a positional argument - if (argStr[0] != '-') { - parsedArgs.positionalArgs.push_back(argStr); - continue; - } - - if (argStr == "-" || argStr == "--") { - return TRTParsedArgs{"Argument does not specify an option at index " + - std::to_string(i)}; - } - - // If only 1 hyphen, char after is the flag. - TRTOption opt{' ', "", false, ""}; - std::string value; - if (argStr[1] != '-') { - // Must only have 1 char after the hyphen - if (argStr.size() > 2) { - return TRTParsedArgs{ - "Short arg contains more than 1 character at index " + - std::to_string(i)}; - } - opt.shortName = argStr[1]; - } else { - opt.longName = argStr.substr(2); - - // We need to support --foo=bar syntax, so look for '=' - const size_t eqIndex = opt.longName.find('='); - if (eqIndex < opt.longName.size()) { - value = opt.longName.substr(eqIndex + 1); - opt.longName = opt.longName.substr(0, eqIndex); - } - } - - const int idx = getTRTOptionIndex(options, opt); - if (idx < 0) { - continue; - } - - if (options[idx].valueRequired) { - if (!value.empty()) { - parsedArgs.values[idx].second.push_back(value); - parsedArgs.values[idx].first = parsedArgs.values[idx].second.size(); - continue; - } - - if (i + 1 >= argc) { - return TRTParsedArgs{"Last argument requires value, but none given"}; - } - - const std::string nextArg(argv[i + 1]); - if (nextArg.size() >= 1 && nextArg[0] == '-') { - sample::gLogWarning << "Warning: Using '" << nextArg - << "' as a value for '" << argStr - << "', Should this be its own flag?" << std::endl; - } - - parsedArgs.values[idx].second.push_back(nextArg); - i += 1; // Next argument already consumed - - parsedArgs.values[idx].first = parsedArgs.values[idx].second.size(); - } else { - parsedArgs.values[idx].first += 1; - } - } - return parsedArgs; -} - -TRTParsedArgs getOptions(int argc, const char* const* argv, - const std::vector& options) { - const std::string errMsg = validateTRTOptions(options); - if (!errMsg.empty()) { - return TRTParsedArgs{errMsg}; - } - return parseArgs(argc, argv, options); -} -} // namespace utility -} // namespace nvinfer1 diff --git a/csrcs/fastdeploy/backends/tensorrt/common/getOptions.h b/csrcs/fastdeploy/backends/tensorrt/common/getOptions.h deleted file mode 100644 index efe466632..000000000 --- a/csrcs/fastdeploy/backends/tensorrt/common/getOptions.h +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef TRT_GET_OPTIONS_H -#define TRT_GET_OPTIONS_H - -#include -#include -#include - -namespace nvinfer1 { -namespace utility { - -//! TRTOption defines a command line option. At least 1 of shortName and -//! longName -//! must be defined. -//! If bool initialization is undefined behavior on your system, valueRequired -//! must also be explicitly defined. -//! helpText is optional. -struct TRTOption { - char shortName; //!< Option name in short (single hyphen) form (i.e. -a, -b) - std::string longName; //!< Option name in long (double hyphen) form (i.e. - //!--foo, --bar) - bool valueRequired; //!< True if a value is needed for an option (i.e. -N 4, - //!--foo bar) - std::string helpText; //!< Text to show when printing out the command usage -}; - -//! TRTParsedArgs is returned by getOptions after it has parsed a command line -//! argument list (argv). -//! -//! errMsg is a string containing an error message if any errors occurred. If it -//! is empty, no errors occurred. -//! -//! values stores a vector of pairs for each option (ordered by order in the -//! input). Each pair contains an int (the number of occurrences) and a vector -//! of strings (a list of values). The user should know which of these to use, -//! and which options required values. For non-value options, only occurrences -//! is -//! populated. For value-required options, occurrences == # of values. Values do -//! not need to be unique. -//! -//! positionalArgs stores additional arguments that are passed in without an -//! option (these must not start with a hyphen). -struct TRTParsedArgs { - std::string errMsg; - std::vector>> values; - std::vector positionalArgs; -}; - -//! Parse the input arguments passed to main() and extract options as well as -//! positional arguments. -//! -//! Options are supposed to be passed to main() with a preceding hyphen '-'. -//! -//! If there is a single preceding hyphen, there should be exactly 1 character -//! after the hyphen, which is interpreted as the option. -//! -//! If there are 2 preceding hyphens, the entire argument (without the hyphens) -//! is interpreted as the option. -//! -//! If the option requires a value, the next argument is used as the value. -//! -//! Positional arguments must not start with a hyphen. -//! -//! If an argument requires a value, the next argument is interpreted as the -//! value, even if it is the form of a valid option (i.e. --foo --bar will store -//! "--bar" as a value for option "foo" if "foo" requires a value). -//! We also support --name=value syntax. In this case, 'value' would be used as -//! the value, NOT the next argument. -//! -//! For options: -//! { { 'a', "", false }, -//! { 'b', "", false }, -//! { 0, "cee", false }, -//! { 'd', "", true }, -//! { 'e', "", true }, -//! { 'f', "foo", true } } -//! -//! ./main hello world -a -a --cee -d 12 -f 34 -//! and -//! ./main hello world -a -a --cee -d 12 --foo 34 -//! -//! will result in: -//! -//! TRTParsedArgs { -//! errMsg: "", -//! values: { { 2, {} }, -//! { 0, {} }, -//! { 1, {} }, -//! { 1, {"12"} }, -//! { 0, {} }, -//! { 1, {"34"} } } -//! positionalArgs: {"hello", "world"}, -//! } -//! -//! Non-POSIX behavior: -//! - Does not support "-abcde" as a shorthand for "-a -b -c -d -e". Each -//! option must have its own hyphen prefix. -//! - Does not support -e12 as a shorthand for "-e 12". Values MUST be -//! whitespace-separated from the option it is for. -//! -//! @param[in] argc The number of arguments passed to main (including the -//! file name, which is disregarded) -//! @param[in] argv The arguments passed to main (including the file name, -//! which is disregarded) -//! @param[in] options List of TRTOptions to parse -//! @return TRTParsedArgs. See TRTParsedArgs documentation for descriptions of -//! the fields. -TRTParsedArgs getOptions(int argc, const char* const* argv, - const std::vector& options); -} // namespace utility -} // namespace nvinfer1 - -#endif // TRT_GET_OPTIONS_H diff --git a/csrcs/fastdeploy/backends/tensorrt/common/half.h b/csrcs/fastdeploy/backends/tensorrt/common/half.h deleted file mode 100644 index 5ca797000..000000000 --- a/csrcs/fastdeploy/backends/tensorrt/common/half.h +++ /dev/null @@ -1,3787 +0,0 @@ -// half - IEEE 754-based half-precision floating point library. -// -// Copyright (c) 2012-2017 Christian Rau -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated -// documentation files (the "Software"), to deal in the Software without -// restriction, including without limitation the -// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -// sell copies of the Software, and to -// permit persons to whom the Software is furnished to do so, subject to the -// following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the -// Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE -// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR -// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE -// OR OTHER DEALINGS IN THE SOFTWARE. - -/* - * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Version 1.12.0 - -/// \file -/// Main header file for half precision functionality. - -#ifndef HALF_HALF_HPP -#define HALF_HALF_HPP - -/// Combined gcc version number. -#define HALF_GNUC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) - -// check C++11 language features -#if defined(__clang__) // clang -#if __has_feature(cxx_static_assert) && \ - !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) -#define HALF_ENABLE_CPP11_STATIC_ASSERT 1 -#endif -#if __has_feature(cxx_constexpr) && !defined(HALF_ENABLE_CPP11_CONSTEXPR) -#define HALF_ENABLE_CPP11_CONSTEXPR 1 -#endif -#if __has_feature(cxx_noexcept) && !defined(HALF_ENABLE_CPP11_NOEXCEPT) -#define HALF_ENABLE_CPP11_NOEXCEPT 1 -#endif -#if __has_feature(cxx_user_literals) && \ - !defined(HALF_ENABLE_CPP11_USER_LITERALS) -#define HALF_ENABLE_CPP11_USER_LITERALS 1 -#endif -#if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L) && \ - !defined(HALF_ENABLE_CPP11_LONG_LONG) -#define HALF_ENABLE_CPP11_LONG_LONG 1 -#endif -/*#elif defined(__INTEL_COMPILER) - //Intel C++ - #if __INTEL_COMPILER >= 1100 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) - ???????? - #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 - #endif - #if __INTEL_COMPILER >= 1300 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) - ???????? - #define HALF_ENABLE_CPP11_CONSTEXPR 1 - #endif - #if __INTEL_COMPILER >= 1300 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) - ???????? - #define HALF_ENABLE_CPP11_NOEXCEPT 1 - #endif - #if __INTEL_COMPILER >= 1100 && !defined(HALF_ENABLE_CPP11_LONG_LONG) - ???????? - #define HALF_ENABLE_CPP11_LONG_LONG 1 - #endif*/ -#elif defined(__GNUC__) // gcc -#if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L -#if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) -#define HALF_ENABLE_CPP11_STATIC_ASSERT 1 -#endif -#if HALF_GNUC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) -#define HALF_ENABLE_CPP11_CONSTEXPR 1 -#endif -#if HALF_GNUC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) -#define HALF_ENABLE_CPP11_NOEXCEPT 1 -#endif -#if HALF_GNUC_VERSION >= 407 && !defined(HALF_ENABLE_CPP11_USER_LITERALS) -#define HALF_ENABLE_CPP11_USER_LITERALS 1 -#endif -#if !defined(HALF_ENABLE_CPP11_LONG_LONG) -#define HALF_ENABLE_CPP11_LONG_LONG 1 -#endif -#endif -#elif defined(_MSC_VER) // Visual C++ -#if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) -#define HALF_ENABLE_CPP11_CONSTEXPR 1 -#endif -#if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) -#define HALF_ENABLE_CPP11_NOEXCEPT 1 -#endif -#if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_USER_LITERALS) -#define HALF_ENABLE_CPP11_USER_LITERALS 1 -#endif -#if _MSC_VER >= 1600 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) -#define HALF_ENABLE_CPP11_STATIC_ASSERT 1 -#endif -#if _MSC_VER >= 1310 && !defined(HALF_ENABLE_CPP11_LONG_LONG) -#define HALF_ENABLE_CPP11_LONG_LONG 1 -#endif -#define HALF_POP_WARNINGS 1 -#pragma warning(push) -#pragma warning(disable : 4099 4127 4146) // struct vs class, constant in if, -// negative unsigned -#endif - -// check C++11 library features -#include -#if defined(_LIBCPP_VERSION) // libc++ -#if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103 -#ifndef HALF_ENABLE_CPP11_TYPE_TRAITS -#define HALF_ENABLE_CPP11_TYPE_TRAITS 1 -#endif -#ifndef HALF_ENABLE_CPP11_CSTDINT -#define HALF_ENABLE_CPP11_CSTDINT 1 -#endif -#ifndef HALF_ENABLE_CPP11_CMATH -#define HALF_ENABLE_CPP11_CMATH 1 -#endif -#ifndef HALF_ENABLE_CPP11_HASH -#define HALF_ENABLE_CPP11_HASH 1 -#endif -#endif -#elif defined(__GLIBCXX__) // libstdc++ -#if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103 -#ifdef __clang__ -#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_TYPE_TRAITS) -#define HALF_ENABLE_CPP11_TYPE_TRAITS 1 -#endif -#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CSTDINT) -#define HALF_ENABLE_CPP11_CSTDINT 1 -#endif -#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CMATH) -#define HALF_ENABLE_CPP11_CMATH 1 -#endif -#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_HASH) -#define HALF_ENABLE_CPP11_HASH 1 -#endif -#else -#if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CSTDINT) -#define HALF_ENABLE_CPP11_CSTDINT 1 -#endif -#if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CMATH) -#define HALF_ENABLE_CPP11_CMATH 1 -#endif -#if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_HASH) -#define HALF_ENABLE_CPP11_HASH 1 -#endif -#endif -#endif -#elif defined(_CPPLIB_VER) // Dinkumware/Visual C++ -#if _CPPLIB_VER >= 520 -#ifndef HALF_ENABLE_CPP11_TYPE_TRAITS -#define HALF_ENABLE_CPP11_TYPE_TRAITS 1 -#endif -#ifndef HALF_ENABLE_CPP11_CSTDINT -#define HALF_ENABLE_CPP11_CSTDINT 1 -#endif -#ifndef HALF_ENABLE_CPP11_HASH -#define HALF_ENABLE_CPP11_HASH 1 -#endif -#endif -#if _CPPLIB_VER >= 610 -#ifndef HALF_ENABLE_CPP11_CMATH -#define HALF_ENABLE_CPP11_CMATH 1 -#endif -#endif -#endif -#undef HALF_GNUC_VERSION - -// support constexpr -#if HALF_ENABLE_CPP11_CONSTEXPR -#define HALF_CONSTEXPR constexpr -#define HALF_CONSTEXPR_CONST constexpr -#else -#define HALF_CONSTEXPR -#define HALF_CONSTEXPR_CONST const -#endif - -// support noexcept -#if HALF_ENABLE_CPP11_NOEXCEPT -#define HALF_NOEXCEPT noexcept -#define HALF_NOTHROW noexcept -#else -#define HALF_NOEXCEPT -#define HALF_NOTHROW throw() -#endif - -#include -#include -#include -#include -#include -#include -#if HALF_ENABLE_CPP11_TYPE_TRAITS -#include -#endif -#if HALF_ENABLE_CPP11_CSTDINT -#include -#endif -#if HALF_ENABLE_CPP11_HASH -#include -#endif - -/// Default rounding mode. -/// This specifies the rounding mode used for all conversions between -/// [half](\ref half_float::half)s and `float`s as -/// well as for the half_cast() if not specifying a rounding mode explicitly. It -/// can be redefined (before including -/// half.hpp) to one of the standard rounding modes using their respective -/// constants or the equivalent values of -/// `std::float_round_style`: -/// -/// `std::float_round_style` | value | rounding -/// ---------------------------------|-------|------------------------- -/// `std::round_indeterminate` | -1 | fastest (default) -/// `std::round_toward_zero` | 0 | toward zero -/// `std::round_to_nearest` | 1 | to nearest -/// `std::round_toward_infinity` | 2 | toward positive infinity -/// `std::round_toward_neg_infinity` | 3 | toward negative infinity -/// -/// By default this is set to `-1` (`std::round_indeterminate`), which uses -/// truncation (round toward zero, but with -/// overflows set to infinity) and is the fastest rounding mode possible. It can -/// even be set to -/// `std::numeric_limits::round_style` to synchronize the rounding mode -/// with that of the underlying -/// single-precision implementation. -#ifndef HALF_ROUND_STYLE -#define HALF_ROUND_STYLE 1 // = std::round_to_nearest -#endif - -/// Tie-breaking behaviour for round to nearest. -/// This specifies if ties in round to nearest should be resolved by rounding to -/// the nearest even value. By default this -/// is defined to `0` resulting in the faster but slightly more biased behaviour -/// of rounding away from zero in half-way -/// cases (and thus equal to the round() function), but can be redefined to `1` -/// (before including half.hpp) if more -/// IEEE-conformant behaviour is needed. -#ifndef HALF_ROUND_TIES_TO_EVEN -#define HALF_ROUND_TIES_TO_EVEN 0 // ties away from zero -#endif - -/// Value signaling overflow. -/// In correspondence with `HUGE_VAL[F|L]` from `` this symbol expands to -/// a positive value signaling the overflow -/// of an operation, in particular it just evaluates to positive infinity. -#define HUGE_VALH std::numeric_limits::infinity() - -/// Fast half-precision fma function. -/// This symbol is only defined if the fma() function generally executes as fast -/// as, or faster than, a separate -/// half-precision multiplication followed by an addition. Due to the internal -/// single-precision implementation of all -/// arithmetic operations, this is in fact always the case. -#define FP_FAST_FMAH 1 - -#ifndef FP_ILOGB0 -#define FP_ILOGB0 INT_MIN -#endif -#ifndef FP_ILOGBNAN -#define FP_ILOGBNAN INT_MAX -#endif -#ifndef FP_SUBNORMAL -#define FP_SUBNORMAL 0 -#endif -#ifndef FP_ZERO -#define FP_ZERO 1 -#endif -#ifndef FP_NAN -#define FP_NAN 2 -#endif -#ifndef FP_INFINITE -#define FP_INFINITE 3 -#endif -#ifndef FP_NORMAL -#define FP_NORMAL 4 -#endif - -/// Main namespace for half precision functionality. -/// This namespace contains all the functionality provided by the library. -namespace half_float { -class half; - -#if HALF_ENABLE_CPP11_USER_LITERALS -/// Library-defined half-precision literals. -/// Import this namespace to enable half-precision floating point literals: -/// ~~~~{.cpp} -/// using namespace half_float::literal; -/// half_float::half = 4.2_h; -/// ~~~~ -namespace literal { -half operator"" _h(long double); -} -#endif - -/// \internal -/// \brief Implementation details. -namespace detail { -#if HALF_ENABLE_CPP11_TYPE_TRAITS -/// Conditional type. -template -struct conditional : std::conditional {}; - -/// Helper for tag dispatching. -template struct bool_type : std::integral_constant {}; -using std::false_type; -using std::true_type; - -/// Type traits for floating point types. -template struct is_float : std::is_floating_point {}; -#else -/// Conditional type. -template struct conditional { typedef T type; }; -template struct conditional { - typedef F type; -}; - -/// Helper for tag dispatching. -template struct bool_type {}; -typedef bool_type true_type; -typedef bool_type false_type; - -/// Type traits for floating point types. -template struct is_float : false_type {}; -template struct is_float : is_float {}; -template struct is_float : is_float {}; -template struct is_float : is_float {}; -template <> struct is_float : true_type {}; -template <> struct is_float : true_type {}; -template <> struct is_float : true_type {}; -#endif - -/// Type traits for floating point bits. -template struct bits { typedef unsigned char type; }; -template struct bits : bits {}; -template struct bits : bits {}; -template struct bits : bits {}; - -#if HALF_ENABLE_CPP11_CSTDINT -/// Unsigned integer of (at least) 16 bits width. -typedef std::uint_least16_t uint16; - -/// Unsigned integer of (at least) 32 bits width. -template <> struct bits { typedef std::uint_least32_t type; }; - -/// Unsigned integer of (at least) 64 bits width. -template <> struct bits { typedef std::uint_least64_t type; }; -#else -/// Unsigned integer of (at least) 16 bits width. -typedef unsigned short uint16; - -/// Unsigned integer of (at least) 32 bits width. -template <> -struct bits - : conditional::digits >= 32, unsigned int, - unsigned long> {}; - -#if HALF_ENABLE_CPP11_LONG_LONG -/// Unsigned integer of (at least) 64 bits width. -template <> -struct bits - : conditional::digits >= 64, - unsigned long, unsigned long long> {}; -#else -/// Unsigned integer of (at least) 64 bits width. -template <> struct bits { typedef unsigned long type; }; -#endif -#endif - -/// Tag type for binary construction. -struct binary_t {}; - -/// Tag for binary construction. -HALF_CONSTEXPR_CONST binary_t binary = binary_t(); - -/// Temporary half-precision expression. -/// This class represents a half-precision expression which just stores a -/// single-precision value internally. -struct expr { - /// Conversion constructor. - /// \param f single-precision value to convert - explicit HALF_CONSTEXPR expr(float f) HALF_NOEXCEPT : value_(f) {} - - /// Conversion to single-precision. - /// \return single precision value representing expression value - HALF_CONSTEXPR operator float() const HALF_NOEXCEPT { return value_; } - - private: - /// Internal expression value stored in single-precision. - float value_; -}; - -/// SFINAE helper for generic half-precision functions. -/// This class template has to be specialized for each valid combination of -/// argument types to provide a corresponding -/// `type` member equivalent to \a T. -/// \tparam T type to return -template -struct enable {}; -template struct enable { typedef T type; }; -template struct enable { typedef T type; }; -template struct enable { typedef T type; }; -template struct enable { typedef T type; }; -template struct enable { typedef T type; }; -template struct enable { typedef T type; }; -template struct enable { typedef T type; }; -template struct enable { typedef T type; }; -template struct enable { typedef T type; }; -template struct enable { typedef T type; }; -template struct enable { typedef T type; }; -template struct enable { typedef T type; }; -template struct enable { typedef T type; }; -template struct enable { typedef T type; }; - -/// Return type for specialized generic 2-argument half-precision functions. -/// This class template has to be specialized for each valid combination of -/// argument types to provide a corresponding -/// `type` member denoting the appropriate return type. -/// \tparam T first argument type -/// \tparam U first argument type -template struct result : enable {}; -template <> struct result { typedef half type; }; - -/// \name Classification helpers -/// \{ - -/// Check for infinity. -/// \tparam T argument type (builtin floating point type) -/// \param arg value to query -/// \retval true if infinity -/// \retval false else -template bool builtin_isinf(T arg) { -#if HALF_ENABLE_CPP11_CMATH - return std::isinf(arg); -#elif defined(_MSC_VER) - return !::_finite(static_cast(arg)) && - !::_isnan(static_cast(arg)); -#else - return arg == std::numeric_limits::infinity() || - arg == -std::numeric_limits::infinity(); -#endif -} - -/// Check for NaN. -/// \tparam T argument type (builtin floating point type) -/// \param arg value to query -/// \retval true if not a number -/// \retval false else -template bool builtin_isnan(T arg) { -#if HALF_ENABLE_CPP11_CMATH - return std::isnan(arg); -#elif defined(_MSC_VER) - return ::_isnan(static_cast(arg)) != 0; -#else - return arg != arg; -#endif -} - -/// Check sign. -/// \tparam T argument type (builtin floating point type) -/// \param arg value to query -/// \retval true if signbit set -/// \retval false else -template bool builtin_signbit(T arg) { -#if HALF_ENABLE_CPP11_CMATH - return std::signbit(arg); -#else - return arg < T() || (arg == T() && T(1) / arg < T()); -#endif -} - -/// \} -/// \name Conversion -/// \{ - -/// Convert IEEE single-precision to half-precision. -/// Credit for this goes to [Jeroen van der -/// Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf). -/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest -/// rounding -/// \param value single-precision value -/// \return binary representation of half-precision value -template -uint16 float2half_impl(float value, true_type) { - typedef bits::type uint32; - uint32 bits; // = *reinterpret_cast(&value); - // //violating - // strict aliasing! - std::memcpy(&bits, &value, sizeof(float)); - /* uint16 hbits = (bits>>16) & 0x8000; - bits &= 0x7FFFFFFF; - int exp = bits >> 23; - if(exp == 255) - return hbits | 0x7C00 | - (0x3FF&-static_cast((bits&0x7FFFFF)!=0)); - if(exp > 142) - { - if(R == std::round_toward_infinity) - return hbits | 0x7C00 - (hbits>>15); - if(R == std::round_toward_neg_infinity) - return hbits | 0x7BFF + (hbits>>15); - return hbits | 0x7BFF + (R!=std::round_toward_zero); - } - int g, s; - if(exp > 112) - { - g = (bits>>12) & 1; - s = (bits&0xFFF) != 0; - hbits |= ((exp-112)<<10) | ((bits>>13)&0x3FF); - } - else if(exp > 101) - { - int i = 125 - exp; - bits = (bits&0x7FFFFF) | 0x800000; - g = (bits>>i) & 1; - s = (bits&((1L<> (i+1); - } - else - { - g = 0; - s = bits != 0; - } - if(R == std::round_to_nearest) - #if HALF_ROUND_TIES_TO_EVEN - hbits += g & (s|hbits); - #else - hbits += g; - #endif - else if(R == std::round_toward_infinity) - hbits += ~(hbits>>15) & (s|g); - else if(R == std::round_toward_neg_infinity) - hbits += (hbits>>15) & (g|s); - */ - static const uint16 base_table[512] = { - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, - 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0C00, 0x1000, - 0x1400, 0x1800, 0x1C00, 0x2000, 0x2400, 0x2800, 0x2C00, 0x3000, 0x3400, - 0x3800, 0x3C00, 0x4000, 0x4400, 0x4800, 0x4C00, 0x5000, 0x5400, 0x5800, - 0x5C00, 0x6000, 0x6400, 0x6800, 0x6C00, 0x7000, 0x7400, 0x7800, 0x7C00, - 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, - 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, - 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, - 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, - 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, - 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, - 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, - 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, - 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, - 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, - 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, - 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, - 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, - 0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, - 0x8400, 0x8800, 0x8C00, 0x9000, 0x9400, 0x9800, 0x9C00, 0xA000, 0xA400, - 0xA800, 0xAC00, 0xB000, 0xB400, 0xB800, 0xBC00, 0xC000, 0xC400, 0xC800, - 0xCC00, 0xD000, 0xD400, 0xD800, 0xDC00, 0xE000, 0xE400, 0xE800, 0xEC00, - 0xF000, 0xF400, 0xF800, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, - 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, - 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, - 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, - 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, - 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, - 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, - 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, - 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, - 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, - 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, - 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, - 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00}; - static const unsigned char shift_table[512] = { - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 23, 22, 21, 20, 19, - 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 23, - 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 13}; - uint16 hbits = - base_table[bits >> 23] + - static_cast((bits & 0x7FFFFF) >> shift_table[bits >> 23]); - if (R == std::round_to_nearest) - hbits += - (((bits & 0x7FFFFF) >> (shift_table[bits >> 23] - 1)) | - (((bits >> 23) & 0xFF) == 102)) & - ((hbits & 0x7C00) != 0x7C00) -#if HALF_ROUND_TIES_TO_EVEN - & (((((static_cast(1) << (shift_table[bits >> 23] - 1)) - 1) & - bits) != 0) | - hbits) -#endif - ; - else if (R == std::round_toward_zero) - hbits -= ((hbits & 0x7FFF) == 0x7C00) & ~shift_table[bits >> 23]; - else if (R == std::round_toward_infinity) - hbits += - ((((bits & 0x7FFFFF & - ((static_cast(1) << (shift_table[bits >> 23])) - 1)) != 0) | - (((bits >> 23) <= 102) & ((bits >> 23) != 0))) & - (hbits < 0x7C00)) - - ((hbits == 0xFC00) & ((bits >> 23) != 511)); - else if (R == std::round_toward_neg_infinity) - hbits += - ((((bits & 0x7FFFFF & - ((static_cast(1) << (shift_table[bits >> 23])) - 1)) != 0) | - (((bits >> 23) <= 358) & ((bits >> 23) != 256))) & - (hbits < 0xFC00) & (hbits >> 15)) - - ((hbits == 0x7C00) & ((bits >> 23) != 255)); - return hbits; -} - -/// Convert IEEE double-precision to half-precision. -/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest -/// rounding -/// \param value double-precision value -/// \return binary representation of half-precision value -template -uint16 float2half_impl(double value, true_type) { - typedef bits::type uint32; - typedef bits::type uint64; - uint64 bits; // = *reinterpret_cast(&value); - // //violating - // strict aliasing! - std::memcpy(&bits, &value, sizeof(double)); - uint32 hi = bits >> 32, lo = bits & 0xFFFFFFFF; - uint16 hbits = (hi >> 16) & 0x8000; - hi &= 0x7FFFFFFF; - int exp = hi >> 20; - if (exp == 2047) - return hbits | 0x7C00 | - (0x3FF & -static_cast((bits & 0xFFFFFFFFFFFFF) != 0)); - if (exp > 1038) { - if (R == std::round_toward_infinity) - return hbits | 0x7C00 - (hbits >> 15); - if (R == std::round_toward_neg_infinity) - return hbits | 0x7BFF + (hbits >> 15); - return hbits | 0x7BFF + (R != std::round_toward_zero); - } - int g, s = lo != 0; - if (exp > 1008) { - g = (hi >> 9) & 1; - s |= (hi & 0x1FF) != 0; - hbits |= ((exp - 1008) << 10) | ((hi >> 10) & 0x3FF); - } else if (exp > 997) { - int i = 1018 - exp; - hi = (hi & 0xFFFFF) | 0x100000; - g = (hi >> i) & 1; - s |= (hi & ((1L << i) - 1)) != 0; - hbits |= hi >> (i + 1); - } else { - g = 0; - s |= hi != 0; - } - if (R == std::round_to_nearest) -#if HALF_ROUND_TIES_TO_EVEN - hbits += g & (s | hbits); -#else - hbits += g; -#endif - else if (R == std::round_toward_infinity) - hbits += ~(hbits >> 15) & (s | g); - else if (R == std::round_toward_neg_infinity) - hbits += (hbits >> 15) & (g | s); - return hbits; -} - -/// Convert non-IEEE floating point to half-precision. -/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest -/// rounding -/// \tparam T source type (builtin floating point type) -/// \param value floating point value -/// \return binary representation of half-precision value -template -uint16 float2half_impl(T value, ...) { - uint16 hbits = static_cast(builtin_signbit(value)) << 15; - if (value == T()) - return hbits; - if (builtin_isnan(value)) - return hbits | 0x7FFF; - if (builtin_isinf(value)) - return hbits | 0x7C00; - int exp; - std::frexp(value, &exp); - if (exp > 16) { - if (R == std::round_toward_infinity) - return hbits | (0x7C00 - (hbits >> 15)); - else if (R == std::round_toward_neg_infinity) - return hbits | (0x7BFF + (hbits >> 15)); - return hbits | (0x7BFF + (R != std::round_toward_zero)); - } - if (exp < -13) - value = std::ldexp(value, 24); - else { - value = std::ldexp(value, 11 - exp); - hbits |= ((exp + 13) << 10); - } - T ival, frac = std::modf(value, &ival); - hbits += static_cast(std::abs(static_cast(ival))); - if (R == std::round_to_nearest) { - frac = std::abs(frac); -#if HALF_ROUND_TIES_TO_EVEN - hbits += (frac > T(0.5)) | ((frac == T(0.5)) & hbits); -#else - hbits += frac >= T(0.5); -#endif - } else if (R == std::round_toward_infinity) - hbits += frac > T(); - else if (R == std::round_toward_neg_infinity) - hbits += frac < T(); - return hbits; -} - -/// Convert floating point to half-precision. -/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest -/// rounding -/// \tparam T source type (builtin floating point type) -/// \param value floating point value -/// \return binary representation of half-precision value -template uint16 float2half(T value) { - return float2half_impl( - value, bool_type < std::numeric_limits::is_iec559 && - sizeof(typename bits::type) == sizeof(T) > ()); -} - -/// Convert integer to half-precision floating point. -/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest -/// rounding -/// \tparam S `true` if value negative, `false` else -/// \tparam T type to convert (builtin integer type) -/// \param value non-negative integral value -/// \return binary representation of half-precision value -template -uint16 int2half_impl(T value) { -#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS - static_assert(std::is_integral::value, - "int to half conversion only supports builtin integer types"); -#endif - if (S) - value = -value; - uint16 bits = S << 15; - if (value > 0xFFFF) { - if (R == std::round_toward_infinity) - bits |= 0x7C00 - S; - else if (R == std::round_toward_neg_infinity) - bits |= 0x7BFF + S; - else - bits |= 0x7BFF + (R != std::round_toward_zero); - } else if (value) { - uint32_t m = value, exp = 24; - for (; m < 0x400; m <<= 1, --exp) - ; - for (; m > 0x7FF; m >>= 1, ++exp) - ; - bits |= (exp << 10) + m; - if (exp > 24) { - if (R == std::round_to_nearest) - bits += (value >> (exp - 25)) & 1 -#if HALF_ROUND_TIES_TO_EVEN - & (((((1 << (exp - 25)) - 1) & value) != 0) | bits) -#endif - ; - else if (R == std::round_toward_infinity) - bits += ((value & ((1 << (exp - 24)) - 1)) != 0) & !S; - else if (R == std::round_toward_neg_infinity) - bits += ((value & ((1 << (exp - 24)) - 1)) != 0) & S; - } - } - return bits; -} - -/// Convert integer to half-precision floating point. -/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest -/// rounding -/// \tparam T type to convert (builtin integer type) -/// \param value integral value -/// \return binary representation of half-precision value -template uint16 int2half(T value) { - return (value < 0) ? int2half_impl(value) - : int2half_impl(value); -} - -/// Convert half-precision to IEEE single-precision. -/// Credit for this goes to [Jeroen van der -/// Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf). -/// \param value binary representation of half-precision value -/// \return single-precision value -inline float half2float_impl(uint16 value, float, true_type) { - typedef bits::type uint32; - /* uint32 bits = static_cast(value&0x8000) << 16; - int abs = value & 0x7FFF; - if(abs) - { - bits |= 0x38000000 << static_cast(abs>=0x7C00); - for(; abs<0x400; abs<<=1,bits-=0x800000) ; - bits += static_cast(abs) << 13; - } - */ - static const uint32 mantissa_table[2048] = { - 0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34A00000, - 0x34C00000, 0x34E00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000, - 0x35400000, 0x35500000, 0x35600000, 0x35700000, 0x35800000, 0x35880000, - 0x35900000, 0x35980000, 0x35A00000, 0x35A80000, 0x35B00000, 0x35B80000, - 0x35C00000, 0x35C80000, 0x35D00000, 0x35D80000, 0x35E00000, 0x35E80000, - 0x35F00000, 0x35F80000, 0x36000000, 0x36040000, 0x36080000, 0x360C0000, - 0x36100000, 0x36140000, 0x36180000, 0x361C0000, 0x36200000, 0x36240000, - 0x36280000, 0x362C0000, 0x36300000, 0x36340000, 0x36380000, 0x363C0000, - 0x36400000, 0x36440000, 0x36480000, 0x364C0000, 0x36500000, 0x36540000, - 0x36580000, 0x365C0000, 0x36600000, 0x36640000, 0x36680000, 0x366C0000, - 0x36700000, 0x36740000, 0x36780000, 0x367C0000, 0x36800000, 0x36820000, - 0x36840000, 0x36860000, 0x36880000, 0x368A0000, 0x368C0000, 0x368E0000, - 0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369A0000, - 0x369C0000, 0x369E0000, 0x36A00000, 0x36A20000, 0x36A40000, 0x36A60000, - 0x36A80000, 0x36AA0000, 0x36AC0000, 0x36AE0000, 0x36B00000, 0x36B20000, - 0x36B40000, 0x36B60000, 0x36B80000, 0x36BA0000, 0x36BC0000, 0x36BE0000, - 0x36C00000, 0x36C20000, 0x36C40000, 0x36C60000, 0x36C80000, 0x36CA0000, - 0x36CC0000, 0x36CE0000, 0x36D00000, 0x36D20000, 0x36D40000, 0x36D60000, - 0x36D80000, 0x36DA0000, 0x36DC0000, 0x36DE0000, 0x36E00000, 0x36E20000, - 0x36E40000, 0x36E60000, 0x36E80000, 0x36EA0000, 0x36EC0000, 0x36EE0000, - 0x36F00000, 0x36F20000, 0x36F40000, 0x36F60000, 0x36F80000, 0x36FA0000, - 0x36FC0000, 0x36FE0000, 0x37000000, 0x37010000, 0x37020000, 0x37030000, - 0x37040000, 0x37050000, 0x37060000, 0x37070000, 0x37080000, 0x37090000, - 0x370A0000, 0x370B0000, 0x370C0000, 0x370D0000, 0x370E0000, 0x370F0000, - 0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000, - 0x37160000, 0x37170000, 0x37180000, 0x37190000, 0x371A0000, 0x371B0000, - 0x371C0000, 0x371D0000, 0x371E0000, 0x371F0000, 0x37200000, 0x37210000, - 0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000, - 0x37280000, 0x37290000, 0x372A0000, 0x372B0000, 0x372C0000, 0x372D0000, - 0x372E0000, 0x372F0000, 0x37300000, 0x37310000, 0x37320000, 0x37330000, - 0x37340000, 0x37350000, 0x37360000, 0x37370000, 0x37380000, 0x37390000, - 0x373A0000, 0x373B0000, 0x373C0000, 0x373D0000, 0x373E0000, 0x373F0000, - 0x37400000, 0x37410000, 0x37420000, 0x37430000, 0x37440000, 0x37450000, - 0x37460000, 0x37470000, 0x37480000, 0x37490000, 0x374A0000, 0x374B0000, - 0x374C0000, 0x374D0000, 0x374E0000, 0x374F0000, 0x37500000, 0x37510000, - 0x37520000, 0x37530000, 0x37540000, 0x37550000, 0x37560000, 0x37570000, - 0x37580000, 0x37590000, 0x375A0000, 0x375B0000, 0x375C0000, 0x375D0000, - 0x375E0000, 0x375F0000, 0x37600000, 0x37610000, 0x37620000, 0x37630000, - 0x37640000, 0x37650000, 0x37660000, 0x37670000, 0x37680000, 0x37690000, - 0x376A0000, 0x376B0000, 0x376C0000, 0x376D0000, 0x376E0000, 0x376F0000, - 0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000, - 0x37760000, 0x37770000, 0x37780000, 0x37790000, 0x377A0000, 0x377B0000, - 0x377C0000, 0x377D0000, 0x377E0000, 0x377F0000, 0x37800000, 0x37808000, - 0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000, - 0x37840000, 0x37848000, 0x37850000, 0x37858000, 0x37860000, 0x37868000, - 0x37870000, 0x37878000, 0x37880000, 0x37888000, 0x37890000, 0x37898000, - 0x378A0000, 0x378A8000, 0x378B0000, 0x378B8000, 0x378C0000, 0x378C8000, - 0x378D0000, 0x378D8000, 0x378E0000, 0x378E8000, 0x378F0000, 0x378F8000, - 0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000, - 0x37930000, 0x37938000, 0x37940000, 0x37948000, 0x37950000, 0x37958000, - 0x37960000, 0x37968000, 0x37970000, 0x37978000, 0x37980000, 0x37988000, - 0x37990000, 0x37998000, 0x379A0000, 0x379A8000, 0x379B0000, 0x379B8000, - 0x379C0000, 0x379C8000, 0x379D0000, 0x379D8000, 0x379E0000, 0x379E8000, - 0x379F0000, 0x379F8000, 0x37A00000, 0x37A08000, 0x37A10000, 0x37A18000, - 0x37A20000, 0x37A28000, 0x37A30000, 0x37A38000, 0x37A40000, 0x37A48000, - 0x37A50000, 0x37A58000, 0x37A60000, 0x37A68000, 0x37A70000, 0x37A78000, - 0x37A80000, 0x37A88000, 0x37A90000, 0x37A98000, 0x37AA0000, 0x37AA8000, - 0x37AB0000, 0x37AB8000, 0x37AC0000, 0x37AC8000, 0x37AD0000, 0x37AD8000, - 0x37AE0000, 0x37AE8000, 0x37AF0000, 0x37AF8000, 0x37B00000, 0x37B08000, - 0x37B10000, 0x37B18000, 0x37B20000, 0x37B28000, 0x37B30000, 0x37B38000, - 0x37B40000, 0x37B48000, 0x37B50000, 0x37B58000, 0x37B60000, 0x37B68000, - 0x37B70000, 0x37B78000, 0x37B80000, 0x37B88000, 0x37B90000, 0x37B98000, - 0x37BA0000, 0x37BA8000, 0x37BB0000, 0x37BB8000, 0x37BC0000, 0x37BC8000, - 0x37BD0000, 0x37BD8000, 0x37BE0000, 0x37BE8000, 0x37BF0000, 0x37BF8000, - 0x37C00000, 0x37C08000, 0x37C10000, 0x37C18000, 0x37C20000, 0x37C28000, - 0x37C30000, 0x37C38000, 0x37C40000, 0x37C48000, 0x37C50000, 0x37C58000, - 0x37C60000, 0x37C68000, 0x37C70000, 0x37C78000, 0x37C80000, 0x37C88000, - 0x37C90000, 0x37C98000, 0x37CA0000, 0x37CA8000, 0x37CB0000, 0x37CB8000, - 0x37CC0000, 0x37CC8000, 0x37CD0000, 0x37CD8000, 0x37CE0000, 0x37CE8000, - 0x37CF0000, 0x37CF8000, 0x37D00000, 0x37D08000, 0x37D10000, 0x37D18000, - 0x37D20000, 0x37D28000, 0x37D30000, 0x37D38000, 0x37D40000, 0x37D48000, - 0x37D50000, 0x37D58000, 0x37D60000, 0x37D68000, 0x37D70000, 0x37D78000, - 0x37D80000, 0x37D88000, 0x37D90000, 0x37D98000, 0x37DA0000, 0x37DA8000, - 0x37DB0000, 0x37DB8000, 0x37DC0000, 0x37DC8000, 0x37DD0000, 0x37DD8000, - 0x37DE0000, 0x37DE8000, 0x37DF0000, 0x37DF8000, 0x37E00000, 0x37E08000, - 0x37E10000, 0x37E18000, 0x37E20000, 0x37E28000, 0x37E30000, 0x37E38000, - 0x37E40000, 0x37E48000, 0x37E50000, 0x37E58000, 0x37E60000, 0x37E68000, - 0x37E70000, 0x37E78000, 0x37E80000, 0x37E88000, 0x37E90000, 0x37E98000, - 0x37EA0000, 0x37EA8000, 0x37EB0000, 0x37EB8000, 0x37EC0000, 0x37EC8000, - 0x37ED0000, 0x37ED8000, 0x37EE0000, 0x37EE8000, 0x37EF0000, 0x37EF8000, - 0x37F00000, 0x37F08000, 0x37F10000, 0x37F18000, 0x37F20000, 0x37F28000, - 0x37F30000, 0x37F38000, 0x37F40000, 0x37F48000, 0x37F50000, 0x37F58000, - 0x37F60000, 0x37F68000, 0x37F70000, 0x37F78000, 0x37F80000, 0x37F88000, - 0x37F90000, 0x37F98000, 0x37FA0000, 0x37FA8000, 0x37FB0000, 0x37FB8000, - 0x37FC0000, 0x37FC8000, 0x37FD0000, 0x37FD8000, 0x37FE0000, 0x37FE8000, - 0x37FF0000, 0x37FF8000, 0x38000000, 0x38004000, 0x38008000, 0x3800C000, - 0x38010000, 0x38014000, 0x38018000, 0x3801C000, 0x38020000, 0x38024000, - 0x38028000, 0x3802C000, 0x38030000, 0x38034000, 0x38038000, 0x3803C000, - 0x38040000, 0x38044000, 0x38048000, 0x3804C000, 0x38050000, 0x38054000, - 0x38058000, 0x3805C000, 0x38060000, 0x38064000, 0x38068000, 0x3806C000, - 0x38070000, 0x38074000, 0x38078000, 0x3807C000, 0x38080000, 0x38084000, - 0x38088000, 0x3808C000, 0x38090000, 0x38094000, 0x38098000, 0x3809C000, - 0x380A0000, 0x380A4000, 0x380A8000, 0x380AC000, 0x380B0000, 0x380B4000, - 0x380B8000, 0x380BC000, 0x380C0000, 0x380C4000, 0x380C8000, 0x380CC000, - 0x380D0000, 0x380D4000, 0x380D8000, 0x380DC000, 0x380E0000, 0x380E4000, - 0x380E8000, 0x380EC000, 0x380F0000, 0x380F4000, 0x380F8000, 0x380FC000, - 0x38100000, 0x38104000, 0x38108000, 0x3810C000, 0x38110000, 0x38114000, - 0x38118000, 0x3811C000, 0x38120000, 0x38124000, 0x38128000, 0x3812C000, - 0x38130000, 0x38134000, 0x38138000, 0x3813C000, 0x38140000, 0x38144000, - 0x38148000, 0x3814C000, 0x38150000, 0x38154000, 0x38158000, 0x3815C000, - 0x38160000, 0x38164000, 0x38168000, 0x3816C000, 0x38170000, 0x38174000, - 0x38178000, 0x3817C000, 0x38180000, 0x38184000, 0x38188000, 0x3818C000, - 0x38190000, 0x38194000, 0x38198000, 0x3819C000, 0x381A0000, 0x381A4000, - 0x381A8000, 0x381AC000, 0x381B0000, 0x381B4000, 0x381B8000, 0x381BC000, - 0x381C0000, 0x381C4000, 0x381C8000, 0x381CC000, 0x381D0000, 0x381D4000, - 0x381D8000, 0x381DC000, 0x381E0000, 0x381E4000, 0x381E8000, 0x381EC000, - 0x381F0000, 0x381F4000, 0x381F8000, 0x381FC000, 0x38200000, 0x38204000, - 0x38208000, 0x3820C000, 0x38210000, 0x38214000, 0x38218000, 0x3821C000, - 0x38220000, 0x38224000, 0x38228000, 0x3822C000, 0x38230000, 0x38234000, - 0x38238000, 0x3823C000, 0x38240000, 0x38244000, 0x38248000, 0x3824C000, - 0x38250000, 0x38254000, 0x38258000, 0x3825C000, 0x38260000, 0x38264000, - 0x38268000, 0x3826C000, 0x38270000, 0x38274000, 0x38278000, 0x3827C000, - 0x38280000, 0x38284000, 0x38288000, 0x3828C000, 0x38290000, 0x38294000, - 0x38298000, 0x3829C000, 0x382A0000, 0x382A4000, 0x382A8000, 0x382AC000, - 0x382B0000, 0x382B4000, 0x382B8000, 0x382BC000, 0x382C0000, 0x382C4000, - 0x382C8000, 0x382CC000, 0x382D0000, 0x382D4000, 0x382D8000, 0x382DC000, - 0x382E0000, 0x382E4000, 0x382E8000, 0x382EC000, 0x382F0000, 0x382F4000, - 0x382F8000, 0x382FC000, 0x38300000, 0x38304000, 0x38308000, 0x3830C000, - 0x38310000, 0x38314000, 0x38318000, 0x3831C000, 0x38320000, 0x38324000, - 0x38328000, 0x3832C000, 0x38330000, 0x38334000, 0x38338000, 0x3833C000, - 0x38340000, 0x38344000, 0x38348000, 0x3834C000, 0x38350000, 0x38354000, - 0x38358000, 0x3835C000, 0x38360000, 0x38364000, 0x38368000, 0x3836C000, - 0x38370000, 0x38374000, 0x38378000, 0x3837C000, 0x38380000, 0x38384000, - 0x38388000, 0x3838C000, 0x38390000, 0x38394000, 0x38398000, 0x3839C000, - 0x383A0000, 0x383A4000, 0x383A8000, 0x383AC000, 0x383B0000, 0x383B4000, - 0x383B8000, 0x383BC000, 0x383C0000, 0x383C4000, 0x383C8000, 0x383CC000, - 0x383D0000, 0x383D4000, 0x383D8000, 0x383DC000, 0x383E0000, 0x383E4000, - 0x383E8000, 0x383EC000, 0x383F0000, 0x383F4000, 0x383F8000, 0x383FC000, - 0x38400000, 0x38404000, 0x38408000, 0x3840C000, 0x38410000, 0x38414000, - 0x38418000, 0x3841C000, 0x38420000, 0x38424000, 0x38428000, 0x3842C000, - 0x38430000, 0x38434000, 0x38438000, 0x3843C000, 0x38440000, 0x38444000, - 0x38448000, 0x3844C000, 0x38450000, 0x38454000, 0x38458000, 0x3845C000, - 0x38460000, 0x38464000, 0x38468000, 0x3846C000, 0x38470000, 0x38474000, - 0x38478000, 0x3847C000, 0x38480000, 0x38484000, 0x38488000, 0x3848C000, - 0x38490000, 0x38494000, 0x38498000, 0x3849C000, 0x384A0000, 0x384A4000, - 0x384A8000, 0x384AC000, 0x384B0000, 0x384B4000, 0x384B8000, 0x384BC000, - 0x384C0000, 0x384C4000, 0x384C8000, 0x384CC000, 0x384D0000, 0x384D4000, - 0x384D8000, 0x384DC000, 0x384E0000, 0x384E4000, 0x384E8000, 0x384EC000, - 0x384F0000, 0x384F4000, 0x384F8000, 0x384FC000, 0x38500000, 0x38504000, - 0x38508000, 0x3850C000, 0x38510000, 0x38514000, 0x38518000, 0x3851C000, - 0x38520000, 0x38524000, 0x38528000, 0x3852C000, 0x38530000, 0x38534000, - 0x38538000, 0x3853C000, 0x38540000, 0x38544000, 0x38548000, 0x3854C000, - 0x38550000, 0x38554000, 0x38558000, 0x3855C000, 0x38560000, 0x38564000, - 0x38568000, 0x3856C000, 0x38570000, 0x38574000, 0x38578000, 0x3857C000, - 0x38580000, 0x38584000, 0x38588000, 0x3858C000, 0x38590000, 0x38594000, - 0x38598000, 0x3859C000, 0x385A0000, 0x385A4000, 0x385A8000, 0x385AC000, - 0x385B0000, 0x385B4000, 0x385B8000, 0x385BC000, 0x385C0000, 0x385C4000, - 0x385C8000, 0x385CC000, 0x385D0000, 0x385D4000, 0x385D8000, 0x385DC000, - 0x385E0000, 0x385E4000, 0x385E8000, 0x385EC000, 0x385F0000, 0x385F4000, - 0x385F8000, 0x385FC000, 0x38600000, 0x38604000, 0x38608000, 0x3860C000, - 0x38610000, 0x38614000, 0x38618000, 0x3861C000, 0x38620000, 0x38624000, - 0x38628000, 0x3862C000, 0x38630000, 0x38634000, 0x38638000, 0x3863C000, - 0x38640000, 0x38644000, 0x38648000, 0x3864C000, 0x38650000, 0x38654000, - 0x38658000, 0x3865C000, 0x38660000, 0x38664000, 0x38668000, 0x3866C000, - 0x38670000, 0x38674000, 0x38678000, 0x3867C000, 0x38680000, 0x38684000, - 0x38688000, 0x3868C000, 0x38690000, 0x38694000, 0x38698000, 0x3869C000, - 0x386A0000, 0x386A4000, 0x386A8000, 0x386AC000, 0x386B0000, 0x386B4000, - 0x386B8000, 0x386BC000, 0x386C0000, 0x386C4000, 0x386C8000, 0x386CC000, - 0x386D0000, 0x386D4000, 0x386D8000, 0x386DC000, 0x386E0000, 0x386E4000, - 0x386E8000, 0x386EC000, 0x386F0000, 0x386F4000, 0x386F8000, 0x386FC000, - 0x38700000, 0x38704000, 0x38708000, 0x3870C000, 0x38710000, 0x38714000, - 0x38718000, 0x3871C000, 0x38720000, 0x38724000, 0x38728000, 0x3872C000, - 0x38730000, 0x38734000, 0x38738000, 0x3873C000, 0x38740000, 0x38744000, - 0x38748000, 0x3874C000, 0x38750000, 0x38754000, 0x38758000, 0x3875C000, - 0x38760000, 0x38764000, 0x38768000, 0x3876C000, 0x38770000, 0x38774000, - 0x38778000, 0x3877C000, 0x38780000, 0x38784000, 0x38788000, 0x3878C000, - 0x38790000, 0x38794000, 0x38798000, 0x3879C000, 0x387A0000, 0x387A4000, - 0x387A8000, 0x387AC000, 0x387B0000, 0x387B4000, 0x387B8000, 0x387BC000, - 0x387C0000, 0x387C4000, 0x387C8000, 0x387CC000, 0x387D0000, 0x387D4000, - 0x387D8000, 0x387DC000, 0x387E0000, 0x387E4000, 0x387E8000, 0x387EC000, - 0x387F0000, 0x387F4000, 0x387F8000, 0x387FC000, 0x38000000, 0x38002000, - 0x38004000, 0x38006000, 0x38008000, 0x3800A000, 0x3800C000, 0x3800E000, - 0x38010000, 0x38012000, 0x38014000, 0x38016000, 0x38018000, 0x3801A000, - 0x3801C000, 0x3801E000, 0x38020000, 0x38022000, 0x38024000, 0x38026000, - 0x38028000, 0x3802A000, 0x3802C000, 0x3802E000, 0x38030000, 0x38032000, - 0x38034000, 0x38036000, 0x38038000, 0x3803A000, 0x3803C000, 0x3803E000, - 0x38040000, 0x38042000, 0x38044000, 0x38046000, 0x38048000, 0x3804A000, - 0x3804C000, 0x3804E000, 0x38050000, 0x38052000, 0x38054000, 0x38056000, - 0x38058000, 0x3805A000, 0x3805C000, 0x3805E000, 0x38060000, 0x38062000, - 0x38064000, 0x38066000, 0x38068000, 0x3806A000, 0x3806C000, 0x3806E000, - 0x38070000, 0x38072000, 0x38074000, 0x38076000, 0x38078000, 0x3807A000, - 0x3807C000, 0x3807E000, 0x38080000, 0x38082000, 0x38084000, 0x38086000, - 0x38088000, 0x3808A000, 0x3808C000, 0x3808E000, 0x38090000, 0x38092000, - 0x38094000, 0x38096000, 0x38098000, 0x3809A000, 0x3809C000, 0x3809E000, - 0x380A0000, 0x380A2000, 0x380A4000, 0x380A6000, 0x380A8000, 0x380AA000, - 0x380AC000, 0x380AE000, 0x380B0000, 0x380B2000, 0x380B4000, 0x380B6000, - 0x380B8000, 0x380BA000, 0x380BC000, 0x380BE000, 0x380C0000, 0x380C2000, - 0x380C4000, 0x380C6000, 0x380C8000, 0x380CA000, 0x380CC000, 0x380CE000, - 0x380D0000, 0x380D2000, 0x380D4000, 0x380D6000, 0x380D8000, 0x380DA000, - 0x380DC000, 0x380DE000, 0x380E0000, 0x380E2000, 0x380E4000, 0x380E6000, - 0x380E8000, 0x380EA000, 0x380EC000, 0x380EE000, 0x380F0000, 0x380F2000, - 0x380F4000, 0x380F6000, 0x380F8000, 0x380FA000, 0x380FC000, 0x380FE000, - 0x38100000, 0x38102000, 0x38104000, 0x38106000, 0x38108000, 0x3810A000, - 0x3810C000, 0x3810E000, 0x38110000, 0x38112000, 0x38114000, 0x38116000, - 0x38118000, 0x3811A000, 0x3811C000, 0x3811E000, 0x38120000, 0x38122000, - 0x38124000, 0x38126000, 0x38128000, 0x3812A000, 0x3812C000, 0x3812E000, - 0x38130000, 0x38132000, 0x38134000, 0x38136000, 0x38138000, 0x3813A000, - 0x3813C000, 0x3813E000, 0x38140000, 0x38142000, 0x38144000, 0x38146000, - 0x38148000, 0x3814A000, 0x3814C000, 0x3814E000, 0x38150000, 0x38152000, - 0x38154000, 0x38156000, 0x38158000, 0x3815A000, 0x3815C000, 0x3815E000, - 0x38160000, 0x38162000, 0x38164000, 0x38166000, 0x38168000, 0x3816A000, - 0x3816C000, 0x3816E000, 0x38170000, 0x38172000, 0x38174000, 0x38176000, - 0x38178000, 0x3817A000, 0x3817C000, 0x3817E000, 0x38180000, 0x38182000, - 0x38184000, 0x38186000, 0x38188000, 0x3818A000, 0x3818C000, 0x3818E000, - 0x38190000, 0x38192000, 0x38194000, 0x38196000, 0x38198000, 0x3819A000, - 0x3819C000, 0x3819E000, 0x381A0000, 0x381A2000, 0x381A4000, 0x381A6000, - 0x381A8000, 0x381AA000, 0x381AC000, 0x381AE000, 0x381B0000, 0x381B2000, - 0x381B4000, 0x381B6000, 0x381B8000, 0x381BA000, 0x381BC000, 0x381BE000, - 0x381C0000, 0x381C2000, 0x381C4000, 0x381C6000, 0x381C8000, 0x381CA000, - 0x381CC000, 0x381CE000, 0x381D0000, 0x381D2000, 0x381D4000, 0x381D6000, - 0x381D8000, 0x381DA000, 0x381DC000, 0x381DE000, 0x381E0000, 0x381E2000, - 0x381E4000, 0x381E6000, 0x381E8000, 0x381EA000, 0x381EC000, 0x381EE000, - 0x381F0000, 0x381F2000, 0x381F4000, 0x381F6000, 0x381F8000, 0x381FA000, - 0x381FC000, 0x381FE000, 0x38200000, 0x38202000, 0x38204000, 0x38206000, - 0x38208000, 0x3820A000, 0x3820C000, 0x3820E000, 0x38210000, 0x38212000, - 0x38214000, 0x38216000, 0x38218000, 0x3821A000, 0x3821C000, 0x3821E000, - 0x38220000, 0x38222000, 0x38224000, 0x38226000, 0x38228000, 0x3822A000, - 0x3822C000, 0x3822E000, 0x38230000, 0x38232000, 0x38234000, 0x38236000, - 0x38238000, 0x3823A000, 0x3823C000, 0x3823E000, 0x38240000, 0x38242000, - 0x38244000, 0x38246000, 0x38248000, 0x3824A000, 0x3824C000, 0x3824E000, - 0x38250000, 0x38252000, 0x38254000, 0x38256000, 0x38258000, 0x3825A000, - 0x3825C000, 0x3825E000, 0x38260000, 0x38262000, 0x38264000, 0x38266000, - 0x38268000, 0x3826A000, 0x3826C000, 0x3826E000, 0x38270000, 0x38272000, - 0x38274000, 0x38276000, 0x38278000, 0x3827A000, 0x3827C000, 0x3827E000, - 0x38280000, 0x38282000, 0x38284000, 0x38286000, 0x38288000, 0x3828A000, - 0x3828C000, 0x3828E000, 0x38290000, 0x38292000, 0x38294000, 0x38296000, - 0x38298000, 0x3829A000, 0x3829C000, 0x3829E000, 0x382A0000, 0x382A2000, - 0x382A4000, 0x382A6000, 0x382A8000, 0x382AA000, 0x382AC000, 0x382AE000, - 0x382B0000, 0x382B2000, 0x382B4000, 0x382B6000, 0x382B8000, 0x382BA000, - 0x382BC000, 0x382BE000, 0x382C0000, 0x382C2000, 0x382C4000, 0x382C6000, - 0x382C8000, 0x382CA000, 0x382CC000, 0x382CE000, 0x382D0000, 0x382D2000, - 0x382D4000, 0x382D6000, 0x382D8000, 0x382DA000, 0x382DC000, 0x382DE000, - 0x382E0000, 0x382E2000, 0x382E4000, 0x382E6000, 0x382E8000, 0x382EA000, - 0x382EC000, 0x382EE000, 0x382F0000, 0x382F2000, 0x382F4000, 0x382F6000, - 0x382F8000, 0x382FA000, 0x382FC000, 0x382FE000, 0x38300000, 0x38302000, - 0x38304000, 0x38306000, 0x38308000, 0x3830A000, 0x3830C000, 0x3830E000, - 0x38310000, 0x38312000, 0x38314000, 0x38316000, 0x38318000, 0x3831A000, - 0x3831C000, 0x3831E000, 0x38320000, 0x38322000, 0x38324000, 0x38326000, - 0x38328000, 0x3832A000, 0x3832C000, 0x3832E000, 0x38330000, 0x38332000, - 0x38334000, 0x38336000, 0x38338000, 0x3833A000, 0x3833C000, 0x3833E000, - 0x38340000, 0x38342000, 0x38344000, 0x38346000, 0x38348000, 0x3834A000, - 0x3834C000, 0x3834E000, 0x38350000, 0x38352000, 0x38354000, 0x38356000, - 0x38358000, 0x3835A000, 0x3835C000, 0x3835E000, 0x38360000, 0x38362000, - 0x38364000, 0x38366000, 0x38368000, 0x3836A000, 0x3836C000, 0x3836E000, - 0x38370000, 0x38372000, 0x38374000, 0x38376000, 0x38378000, 0x3837A000, - 0x3837C000, 0x3837E000, 0x38380000, 0x38382000, 0x38384000, 0x38386000, - 0x38388000, 0x3838A000, 0x3838C000, 0x3838E000, 0x38390000, 0x38392000, - 0x38394000, 0x38396000, 0x38398000, 0x3839A000, 0x3839C000, 0x3839E000, - 0x383A0000, 0x383A2000, 0x383A4000, 0x383A6000, 0x383A8000, 0x383AA000, - 0x383AC000, 0x383AE000, 0x383B0000, 0x383B2000, 0x383B4000, 0x383B6000, - 0x383B8000, 0x383BA000, 0x383BC000, 0x383BE000, 0x383C0000, 0x383C2000, - 0x383C4000, 0x383C6000, 0x383C8000, 0x383CA000, 0x383CC000, 0x383CE000, - 0x383D0000, 0x383D2000, 0x383D4000, 0x383D6000, 0x383D8000, 0x383DA000, - 0x383DC000, 0x383DE000, 0x383E0000, 0x383E2000, 0x383E4000, 0x383E6000, - 0x383E8000, 0x383EA000, 0x383EC000, 0x383EE000, 0x383F0000, 0x383F2000, - 0x383F4000, 0x383F6000, 0x383F8000, 0x383FA000, 0x383FC000, 0x383FE000, - 0x38400000, 0x38402000, 0x38404000, 0x38406000, 0x38408000, 0x3840A000, - 0x3840C000, 0x3840E000, 0x38410000, 0x38412000, 0x38414000, 0x38416000, - 0x38418000, 0x3841A000, 0x3841C000, 0x3841E000, 0x38420000, 0x38422000, - 0x38424000, 0x38426000, 0x38428000, 0x3842A000, 0x3842C000, 0x3842E000, - 0x38430000, 0x38432000, 0x38434000, 0x38436000, 0x38438000, 0x3843A000, - 0x3843C000, 0x3843E000, 0x38440000, 0x38442000, 0x38444000, 0x38446000, - 0x38448000, 0x3844A000, 0x3844C000, 0x3844E000, 0x38450000, 0x38452000, - 0x38454000, 0x38456000, 0x38458000, 0x3845A000, 0x3845C000, 0x3845E000, - 0x38460000, 0x38462000, 0x38464000, 0x38466000, 0x38468000, 0x3846A000, - 0x3846C000, 0x3846E000, 0x38470000, 0x38472000, 0x38474000, 0x38476000, - 0x38478000, 0x3847A000, 0x3847C000, 0x3847E000, 0x38480000, 0x38482000, - 0x38484000, 0x38486000, 0x38488000, 0x3848A000, 0x3848C000, 0x3848E000, - 0x38490000, 0x38492000, 0x38494000, 0x38496000, 0x38498000, 0x3849A000, - 0x3849C000, 0x3849E000, 0x384A0000, 0x384A2000, 0x384A4000, 0x384A6000, - 0x384A8000, 0x384AA000, 0x384AC000, 0x384AE000, 0x384B0000, 0x384B2000, - 0x384B4000, 0x384B6000, 0x384B8000, 0x384BA000, 0x384BC000, 0x384BE000, - 0x384C0000, 0x384C2000, 0x384C4000, 0x384C6000, 0x384C8000, 0x384CA000, - 0x384CC000, 0x384CE000, 0x384D0000, 0x384D2000, 0x384D4000, 0x384D6000, - 0x384D8000, 0x384DA000, 0x384DC000, 0x384DE000, 0x384E0000, 0x384E2000, - 0x384E4000, 0x384E6000, 0x384E8000, 0x384EA000, 0x384EC000, 0x384EE000, - 0x384F0000, 0x384F2000, 0x384F4000, 0x384F6000, 0x384F8000, 0x384FA000, - 0x384FC000, 0x384FE000, 0x38500000, 0x38502000, 0x38504000, 0x38506000, - 0x38508000, 0x3850A000, 0x3850C000, 0x3850E000, 0x38510000, 0x38512000, - 0x38514000, 0x38516000, 0x38518000, 0x3851A000, 0x3851C000, 0x3851E000, - 0x38520000, 0x38522000, 0x38524000, 0x38526000, 0x38528000, 0x3852A000, - 0x3852C000, 0x3852E000, 0x38530000, 0x38532000, 0x38534000, 0x38536000, - 0x38538000, 0x3853A000, 0x3853C000, 0x3853E000, 0x38540000, 0x38542000, - 0x38544000, 0x38546000, 0x38548000, 0x3854A000, 0x3854C000, 0x3854E000, - 0x38550000, 0x38552000, 0x38554000, 0x38556000, 0x38558000, 0x3855A000, - 0x3855C000, 0x3855E000, 0x38560000, 0x38562000, 0x38564000, 0x38566000, - 0x38568000, 0x3856A000, 0x3856C000, 0x3856E000, 0x38570000, 0x38572000, - 0x38574000, 0x38576000, 0x38578000, 0x3857A000, 0x3857C000, 0x3857E000, - 0x38580000, 0x38582000, 0x38584000, 0x38586000, 0x38588000, 0x3858A000, - 0x3858C000, 0x3858E000, 0x38590000, 0x38592000, 0x38594000, 0x38596000, - 0x38598000, 0x3859A000, 0x3859C000, 0x3859E000, 0x385A0000, 0x385A2000, - 0x385A4000, 0x385A6000, 0x385A8000, 0x385AA000, 0x385AC000, 0x385AE000, - 0x385B0000, 0x385B2000, 0x385B4000, 0x385B6000, 0x385B8000, 0x385BA000, - 0x385BC000, 0x385BE000, 0x385C0000, 0x385C2000, 0x385C4000, 0x385C6000, - 0x385C8000, 0x385CA000, 0x385CC000, 0x385CE000, 0x385D0000, 0x385D2000, - 0x385D4000, 0x385D6000, 0x385D8000, 0x385DA000, 0x385DC000, 0x385DE000, - 0x385E0000, 0x385E2000, 0x385E4000, 0x385E6000, 0x385E8000, 0x385EA000, - 0x385EC000, 0x385EE000, 0x385F0000, 0x385F2000, 0x385F4000, 0x385F6000, - 0x385F8000, 0x385FA000, 0x385FC000, 0x385FE000, 0x38600000, 0x38602000, - 0x38604000, 0x38606000, 0x38608000, 0x3860A000, 0x3860C000, 0x3860E000, - 0x38610000, 0x38612000, 0x38614000, 0x38616000, 0x38618000, 0x3861A000, - 0x3861C000, 0x3861E000, 0x38620000, 0x38622000, 0x38624000, 0x38626000, - 0x38628000, 0x3862A000, 0x3862C000, 0x3862E000, 0x38630000, 0x38632000, - 0x38634000, 0x38636000, 0x38638000, 0x3863A000, 0x3863C000, 0x3863E000, - 0x38640000, 0x38642000, 0x38644000, 0x38646000, 0x38648000, 0x3864A000, - 0x3864C000, 0x3864E000, 0x38650000, 0x38652000, 0x38654000, 0x38656000, - 0x38658000, 0x3865A000, 0x3865C000, 0x3865E000, 0x38660000, 0x38662000, - 0x38664000, 0x38666000, 0x38668000, 0x3866A000, 0x3866C000, 0x3866E000, - 0x38670000, 0x38672000, 0x38674000, 0x38676000, 0x38678000, 0x3867A000, - 0x3867C000, 0x3867E000, 0x38680000, 0x38682000, 0x38684000, 0x38686000, - 0x38688000, 0x3868A000, 0x3868C000, 0x3868E000, 0x38690000, 0x38692000, - 0x38694000, 0x38696000, 0x38698000, 0x3869A000, 0x3869C000, 0x3869E000, - 0x386A0000, 0x386A2000, 0x386A4000, 0x386A6000, 0x386A8000, 0x386AA000, - 0x386AC000, 0x386AE000, 0x386B0000, 0x386B2000, 0x386B4000, 0x386B6000, - 0x386B8000, 0x386BA000, 0x386BC000, 0x386BE000, 0x386C0000, 0x386C2000, - 0x386C4000, 0x386C6000, 0x386C8000, 0x386CA000, 0x386CC000, 0x386CE000, - 0x386D0000, 0x386D2000, 0x386D4000, 0x386D6000, 0x386D8000, 0x386DA000, - 0x386DC000, 0x386DE000, 0x386E0000, 0x386E2000, 0x386E4000, 0x386E6000, - 0x386E8000, 0x386EA000, 0x386EC000, 0x386EE000, 0x386F0000, 0x386F2000, - 0x386F4000, 0x386F6000, 0x386F8000, 0x386FA000, 0x386FC000, 0x386FE000, - 0x38700000, 0x38702000, 0x38704000, 0x38706000, 0x38708000, 0x3870A000, - 0x3870C000, 0x3870E000, 0x38710000, 0x38712000, 0x38714000, 0x38716000, - 0x38718000, 0x3871A000, 0x3871C000, 0x3871E000, 0x38720000, 0x38722000, - 0x38724000, 0x38726000, 0x38728000, 0x3872A000, 0x3872C000, 0x3872E000, - 0x38730000, 0x38732000, 0x38734000, 0x38736000, 0x38738000, 0x3873A000, - 0x3873C000, 0x3873E000, 0x38740000, 0x38742000, 0x38744000, 0x38746000, - 0x38748000, 0x3874A000, 0x3874C000, 0x3874E000, 0x38750000, 0x38752000, - 0x38754000, 0x38756000, 0x38758000, 0x3875A000, 0x3875C000, 0x3875E000, - 0x38760000, 0x38762000, 0x38764000, 0x38766000, 0x38768000, 0x3876A000, - 0x3876C000, 0x3876E000, 0x38770000, 0x38772000, 0x38774000, 0x38776000, - 0x38778000, 0x3877A000, 0x3877C000, 0x3877E000, 0x38780000, 0x38782000, - 0x38784000, 0x38786000, 0x38788000, 0x3878A000, 0x3878C000, 0x3878E000, - 0x38790000, 0x38792000, 0x38794000, 0x38796000, 0x38798000, 0x3879A000, - 0x3879C000, 0x3879E000, 0x387A0000, 0x387A2000, 0x387A4000, 0x387A6000, - 0x387A8000, 0x387AA000, 0x387AC000, 0x387AE000, 0x387B0000, 0x387B2000, - 0x387B4000, 0x387B6000, 0x387B8000, 0x387BA000, 0x387BC000, 0x387BE000, - 0x387C0000, 0x387C2000, 0x387C4000, 0x387C6000, 0x387C8000, 0x387CA000, - 0x387CC000, 0x387CE000, 0x387D0000, 0x387D2000, 0x387D4000, 0x387D6000, - 0x387D8000, 0x387DA000, 0x387DC000, 0x387DE000, 0x387E0000, 0x387E2000, - 0x387E4000, 0x387E6000, 0x387E8000, 0x387EA000, 0x387EC000, 0x387EE000, - 0x387F0000, 0x387F2000, 0x387F4000, 0x387F6000, 0x387F8000, 0x387FA000, - 0x387FC000, 0x387FE000}; - static const uint32 exponent_table[64] = { - 0x00000000, 0x00800000, 0x01000000, 0x01800000, 0x02000000, 0x02800000, - 0x03000000, 0x03800000, 0x04000000, 0x04800000, 0x05000000, 0x05800000, - 0x06000000, 0x06800000, 0x07000000, 0x07800000, 0x08000000, 0x08800000, - 0x09000000, 0x09800000, 0x0A000000, 0x0A800000, 0x0B000000, 0x0B800000, - 0x0C000000, 0x0C800000, 0x0D000000, 0x0D800000, 0x0E000000, 0x0E800000, - 0x0F000000, 0x47800000, 0x80000000, 0x80800000, 0x81000000, 0x81800000, - 0x82000000, 0x82800000, 0x83000000, 0x83800000, 0x84000000, 0x84800000, - 0x85000000, 0x85800000, 0x86000000, 0x86800000, 0x87000000, 0x87800000, - 0x88000000, 0x88800000, 0x89000000, 0x89800000, 0x8A000000, 0x8A800000, - 0x8B000000, 0x8B800000, 0x8C000000, 0x8C800000, 0x8D000000, 0x8D800000, - 0x8E000000, 0x8E800000, 0x8F000000, 0xC7800000}; - static const unsigned short offset_table[64] = { - 0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, - 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, - 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 0, - 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, - 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, - 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024}; - uint32 bits = mantissa_table[offset_table[value >> 10] + (value & 0x3FF)] + - exponent_table[value >> 10]; - // return *reinterpret_cast(&bits); - ////violating - // strict aliasing! - float out; - std::memcpy(&out, &bits, sizeof(float)); - return out; -} - -/// Convert half-precision to IEEE double-precision. -/// \param value binary representation of half-precision value -/// \return double-precision value -inline double half2float_impl(uint16 value, double, true_type) { - typedef bits::type uint32; - typedef bits::type uint64; - uint32 hi = static_cast(value & 0x8000) << 16; - int abs = value & 0x7FFF; - if (abs) { - hi |= 0x3F000000 << static_cast(abs >= 0x7C00); - for (; abs < 0x400; abs <<= 1, hi -= 0x100000) - ; - hi += static_cast(abs) << 10; - } - uint64 bits = static_cast(hi) << 32; - // return *reinterpret_cast(&bits); - ////violating - // strict aliasing! - double out; - std::memcpy(&out, &bits, sizeof(double)); - return out; -} - -/// Convert half-precision to non-IEEE floating point. -/// \tparam T type to convert to (builtin integer type) -/// \param value binary representation of half-precision value -/// \return floating point value -template T half2float_impl(uint16 value, T, ...) { - T out; - int abs = value & 0x7FFF; - if (abs > 0x7C00) - out = std::numeric_limits::has_quiet_NaN - ? std::numeric_limits::quiet_NaN() - : T(); - else if (abs == 0x7C00) - out = std::numeric_limits::has_infinity - ? std::numeric_limits::infinity() - : std::numeric_limits::max(); - else if (abs > 0x3FF) - out = std::ldexp(static_cast((abs & 0x3FF) | 0x400), (abs >> 10) - 25); - else - out = std::ldexp(static_cast(abs), -24); - return (value & 0x8000) ? -out : out; -} - -/// Convert half-precision to floating point. -/// \tparam T type to convert to (builtin integer type) -/// \param value binary representation of half-precision value -/// \return floating point value -template T half2float(uint16 value) { - return half2float_impl(value, T(), - bool_type < std::numeric_limits::is_iec559 && - sizeof(typename bits::type) == sizeof(T) > ()); -} - -/// Convert half-precision floating point to integer. -/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest -/// rounding -/// \tparam E `true` for round to even, `false` for round away from zero -/// \tparam T type to convert to (buitlin integer type with at least 16 bits -/// precision, excluding any implicit sign -/// bits) \param value binary representation of half-precision value \return -/// integral value -template -T half2int_impl(uint16 value) { -#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS - static_assert(std::is_integral::value, - "half to int conversion only supports builtin integer types"); -#endif - uint32_t e = value & 0x7FFF; - if (e >= 0x7C00) - return (value & 0x8000) ? std::numeric_limits::min() - : std::numeric_limits::max(); - if (e < 0x3800) { - if (R == std::round_toward_infinity) - return T(~(value >> 15) & (e != 0)); - else if (R == std::round_toward_neg_infinity) - return -T(value > 0x8000); - return T(); - } - uint32_t m = (value & 0x3FF) | 0x400; - e >>= 10; - if (e < 25) { - if (R == std::round_to_nearest) - m += (1 << (24 - e)) - (~(m >> (25 - e)) & E); - else if (R == std::round_toward_infinity) - m += ((value >> 15) - 1) & ((1 << (25 - e)) - 1U); - else if (R == std::round_toward_neg_infinity) - m += -(value >> 15) & ((1 << (25 - e)) - 1U); - m >>= 25 - e; - } else - m <<= e - 25; - return (value & 0x8000) ? -static_cast(m) : static_cast(m); -} - -/// Convert half-precision floating point to integer. -/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest -/// rounding -/// \tparam T type to convert to (buitlin integer type with at least 16 bits -/// precision, excluding any implicit sign -/// bits) \param value binary representation of half-precision value \return -/// integral value -template T half2int(uint16 value) { - return half2int_impl(value); -} - -/// Convert half-precision floating point to integer using -/// round-to-nearest-away-from-zero. -/// \tparam T type to convert to (buitlin integer type with at least 16 bits -/// precision, excluding any implicit sign -/// bits) \param value binary representation of half-precision value \return -/// integral value -template T half2int_up(uint16 value) { - return half2int_impl(value); -} - -/// Round half-precision number to nearest integer value. -/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest -/// rounding -/// \tparam E `true` for round to even, `false` for round away from zero -/// \param value binary representation of half-precision value -/// \return half-precision bits for nearest integral value -template -uint16 round_half_impl(uint16 value) { - uint32_t e = value & 0x7FFF; - uint16 result = value; - if (e < 0x3C00) { - result &= 0x8000; - if (R == std::round_to_nearest) - result |= 0x3C00U & -(e >= (0x3800 + E)); - else if (R == std::round_toward_infinity) - result |= 0x3C00U & -(~(value >> 15) & (e != 0)); - else if (R == std::round_toward_neg_infinity) - result |= 0x3C00U & -(value > 0x8000); - } else if (e < 0x6400) { - e = 25 - (e >> 10); - uint32_t mask = (1 << e) - 1; - if (R == std::round_to_nearest) - result += (1 << (e - 1)) - (~(result >> e) & E); - else if (R == std::round_toward_infinity) - result += mask & ((value >> 15) - 1); - else if (R == std::round_toward_neg_infinity) - result += mask & -(value >> 15); - result &= ~mask; - } - return result; -} - -/// Round half-precision number to nearest integer value. -/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest -/// rounding -/// \param value binary representation of half-precision value -/// \return half-precision bits for nearest integral value -template uint16 round_half(uint16 value) { - return round_half_impl(value); -} - -/// Round half-precision number to nearest integer value using -/// round-to-nearest-away-from-zero. -/// \param value binary representation of half-precision value -/// \return half-precision bits for nearest integral value -inline uint16 round_half_up(uint16 value) { - return round_half_impl(value); -} -/// \} - -struct functions; -template struct unary_specialized; -template struct binary_specialized; -template struct half_caster; -} // namespace detail - -/// Half-precision floating point type. -/// This class implements an IEEE-conformant half-precision floating point type -/// with the usual arithmetic operators and -/// conversions. It is implicitly convertible to single-precision floating -/// point, which makes artihmetic expressions and -/// functions with mixed-type operands to be of the most precise operand type. -/// Additionally all arithmetic operations -/// (and many mathematical functions) are carried out in single-precision -/// internally. All conversions from single- to -/// half-precision are done using the library's default rounding mode, but -/// temporary results inside chained arithmetic -/// expressions are kept in single-precision as long as possible (while of -/// course still maintaining a strong -/// half-precision type). -/// -/// According to the C++98/03 definition, the half type is not a POD type. But -/// according to C++11's less strict and -/// extended definitions it is both a standard layout type and a trivially -/// copyable type (even if not a POD type), which -/// means it can be standard-conformantly copied using raw binary copies. But in -/// this context some more words about the -/// actual size of the type. Although the half is representing an IEEE 16-bit -/// type, it does not neccessarily have to be -/// of exactly 16-bits size. But on any reasonable implementation the actual -/// binary representation of this type will -/// most probably not ivolve any additional "magic" or padding beyond the simple -/// binary representation of the underlying -/// 16-bit IEEE number, even if not strictly guaranteed by the standard. But -/// even then it only has an actual size of 16 -/// bits if your C++ implementation supports an unsigned integer type of exactly -/// 16 bits width. But this should be the -/// case on nearly any reasonable platform. -/// -/// So if your C++ implementation is not totally exotic or imposes special -/// alignment requirements, it is a reasonable -/// assumption that the data of a half is just comprised of the 2 bytes of the -/// underlying IEEE representation. -class half { - friend struct detail::functions; - friend struct detail::unary_specialized; - friend struct detail::binary_specialized; - template - friend struct detail::half_caster; - friend class std::numeric_limits; -#if HALF_ENABLE_CPP11_HASH - friend struct std::hash; -#endif -#if HALF_ENABLE_CPP11_USER_LITERALS - friend half literal::operator"" _h(long double); -#endif - - public: - /// Default constructor. - /// This initializes the half to 0. Although this does not match the builtin - /// types' default-initialization semantics - /// and may be less efficient than no initialization, it is needed to provide - /// proper value-initialization semantics. - HALF_CONSTEXPR half() HALF_NOEXCEPT : data_() {} - - /// Copy constructor. - /// \tparam T type of concrete half expression - /// \param rhs half expression to copy from - half(detail::expr rhs) - : data_(detail::float2half(static_cast(rhs))) {} - - /// Conversion constructor. - /// \param rhs float to convert - explicit half(float rhs) : data_(detail::float2half(rhs)) {} - - /// Conversion to single-precision. - /// \return single precision value representing expression value - operator float() const { return detail::half2float(data_); } - - /// Assignment operator. - /// \tparam T type of concrete half expression - /// \param rhs half expression to copy from - /// \return reference to this half - half& operator=(detail::expr rhs) { return *this = static_cast(rhs); } - - /// Arithmetic assignment. - /// \tparam T type of concrete half expression - /// \param rhs half expression to add - /// \return reference to this half - template - typename detail::enable::type operator+=(T rhs) { - return *this += static_cast(rhs); - } - - /// Arithmetic assignment. - /// \tparam T type of concrete half expression - /// \param rhs half expression to subtract - /// \return reference to this half - template - typename detail::enable::type operator-=(T rhs) { - return *this -= static_cast(rhs); - } - - /// Arithmetic assignment. - /// \tparam T type of concrete half expression - /// \param rhs half expression to multiply with - /// \return reference to this half - template - typename detail::enable::type operator*=(T rhs) { - return *this *= static_cast(rhs); - } - - /// Arithmetic assignment. - /// \tparam T type of concrete half expression - /// \param rhs half expression to divide by - /// \return reference to this half - template - typename detail::enable::type operator/=(T rhs) { - return *this /= static_cast(rhs); - } - - /// Assignment operator. - /// \param rhs single-precision value to copy from - /// \return reference to this half - half& operator=(float rhs) { - data_ = detail::float2half(rhs); - return *this; - } - - /// Arithmetic assignment. - /// \param rhs single-precision value to add - /// \return reference to this half - half& operator+=(float rhs) { - data_ = - detail::float2half(detail::half2float(data_) + rhs); - return *this; - } - - /// Arithmetic assignment. - /// \param rhs single-precision value to subtract - /// \return reference to this half - half& operator-=(float rhs) { - data_ = - detail::float2half(detail::half2float(data_) - rhs); - return *this; - } - - /// Arithmetic assignment. - /// \param rhs single-precision value to multiply with - /// \return reference to this half - half& operator*=(float rhs) { - data_ = - detail::float2half(detail::half2float(data_) * rhs); - return *this; - } - - /// Arithmetic assignment. - /// \param rhs single-precision value to divide by - /// \return reference to this half - half& operator/=(float rhs) { - data_ = - detail::float2half(detail::half2float(data_) / rhs); - return *this; - } - - /// Prefix increment. - /// \return incremented half value - half& operator++() { return *this += 1.0f; } - - /// Prefix decrement. - /// \return decremented half value - half& operator--() { return *this -= 1.0f; } - - /// Postfix increment. - /// \return non-incremented half value - half operator++(int) { - half out(*this); - ++*this; - return out; - } - - /// Postfix decrement. - /// \return non-decremented half value - half operator--(int) { - half out(*this); - --*this; - return out; - } - - private: - /// Rounding mode to use - static const std::float_round_style round_style = - (std::float_round_style)(HALF_ROUND_STYLE); - - /// Constructor. - /// \param bits binary representation to set half to - HALF_CONSTEXPR half(detail::binary_t, detail::uint16 bits) HALF_NOEXCEPT - : data_(bits) {} - - /// Internal binary representation - detail::uint16 data_; -}; - -#if HALF_ENABLE_CPP11_USER_LITERALS -namespace literal { -/// Half literal. -/// While this returns an actual half-precision value, half literals can -/// unfortunately not be constant expressions due -/// to rather involved conversions. -/// \param value literal value -/// \return half with given value (if representable) -inline half operator"" _h(long double value) { - return half(detail::binary, detail::float2half(value)); -} -} // namespace literal -#endif - -namespace detail { -/// Wrapper implementing unspecialized half-precision functions. -struct functions { - /// Addition implementation. - /// \param x first operand - /// \param y second operand - /// \return Half-precision sum stored in single-precision - static expr plus(float x, float y) { return expr(x + y); } - - /// Subtraction implementation. - /// \param x first operand - /// \param y second operand - /// \return Half-precision difference stored in single-precision - static expr minus(float x, float y) { return expr(x - y); } - - /// Multiplication implementation. - /// \param x first operand - /// \param y second operand - /// \return Half-precision product stored in single-precision - static expr multiplies(float x, float y) { return expr(x * y); } - - /// Division implementation. - /// \param x first operand - /// \param y second operand - /// \return Half-precision quotient stored in single-precision - static expr divides(float x, float y) { return expr(x / y); } - - /// Output implementation. - /// \param out stream to write to - /// \param arg value to write - /// \return reference to stream - template - static std::basic_ostream& - write(std::basic_ostream& out, float arg) { - return out << arg; - } - - /// Input implementation. - /// \param in stream to read from - /// \param arg half to read into - /// \return reference to stream - template - static std::basic_istream& - read(std::basic_istream& in, half& arg) { - float f; - if (in >> f) - arg = f; - return in; - } - - /// Modulo implementation. - /// \param x first operand - /// \param y second operand - /// \return Half-precision division remainder stored in single-precision - static expr fmod(float x, float y) { return expr(std::fmod(x, y)); } - - /// Remainder implementation. - /// \param x first operand - /// \param y second operand - /// \return Half-precision division remainder stored in single-precision - static expr remainder(float x, float y) { -#if HALF_ENABLE_CPP11_CMATH - return expr(std::remainder(x, y)); -#else - if (builtin_isnan(x) || builtin_isnan(y)) - return expr(std::numeric_limits::quiet_NaN()); - float ax = std::fabs(x), ay = std::fabs(y); - if (ax >= 65536.0f || ay < std::ldexp(1.0f, -24)) - return expr(std::numeric_limits::quiet_NaN()); - if (ay >= 65536.0f) - return expr(x); - if (ax == ay) - return expr(builtin_signbit(x) ? -0.0f : 0.0f); - ax = std::fmod(ax, ay + ay); - float y2 = 0.5f * ay; - if (ax > y2) { - ax -= ay; - if (ax >= y2) - ax -= ay; - } - return expr(builtin_signbit(x) ? -ax : ax); -#endif - } - - /// Remainder implementation. - /// \param x first operand - /// \param y second operand - /// \param quo address to store quotient bits at - /// \return Half-precision division remainder stored in single-precision - static expr remquo(float x, float y, int* quo) { -#if HALF_ENABLE_CPP11_CMATH - return expr(std::remquo(x, y, quo)); -#else - if (builtin_isnan(x) || builtin_isnan(y)) - return expr(std::numeric_limits::quiet_NaN()); - bool sign = builtin_signbit(x), - qsign = static_cast(sign ^ builtin_signbit(y)); - float ax = std::fabs(x), ay = std::fabs(y); - if (ax >= 65536.0f || ay < std::ldexp(1.0f, -24)) - return expr(std::numeric_limits::quiet_NaN()); - if (ay >= 65536.0f) - return expr(x); - if (ax == ay) - return *quo = qsign ? -1 : 1, expr(sign ? -0.0f : 0.0f); - ax = std::fmod(ax, 8.0f * ay); - int cquo = 0; - if (ax >= 4.0f * ay) { - ax -= 4.0f * ay; - cquo += 4; - } - if (ax >= 2.0f * ay) { - ax -= 2.0f * ay; - cquo += 2; - } - float y2 = 0.5f * ay; - if (ax > y2) { - ax -= ay; - ++cquo; - if (ax >= y2) { - ax -= ay; - ++cquo; - } - } - return *quo = qsign ? -cquo : cquo, expr(sign ? -ax : ax); -#endif - } - - /// Positive difference implementation. - /// \param x first operand - /// \param y second operand - /// \return Positive difference stored in single-precision - static expr fdim(float x, float y) { -#if HALF_ENABLE_CPP11_CMATH - return expr(std::fdim(x, y)); -#else - return expr((x <= y) ? 0.0f : (x - y)); -#endif - } - - /// Fused multiply-add implementation. - /// \param x first operand - /// \param y second operand - /// \param z third operand - /// \return \a x * \a y + \a z stored in single-precision - static expr fma(float x, float y, float z) { -#if HALF_ENABLE_CPP11_CMATH && defined(FP_FAST_FMAF) - return expr(std::fma(x, y, z)); -#else - return expr(x * y + z); -#endif - } - - /// Get NaN. - /// \return Half-precision quiet NaN - static half nanh() { return half(binary, 0x7FFF); } - - /// Exponential implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr exp(float arg) { return expr(std::exp(arg)); } - - /// Exponential implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr expm1(float arg) { -#if HALF_ENABLE_CPP11_CMATH - return expr(std::expm1(arg)); -#else - return expr(static_cast(std::exp(static_cast(arg)) - 1.0)); -#endif - } - - /// Binary exponential implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr exp2(float arg) { -#if HALF_ENABLE_CPP11_CMATH - return expr(std::exp2(arg)); -#else - return expr( - static_cast(std::exp(arg * 0.69314718055994530941723212145818))); -#endif - } - - /// Logarithm implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr log(float arg) { return expr(std::log(arg)); } - - /// Common logarithm implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr log10(float arg) { return expr(std::log10(arg)); } - - /// Logarithm implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr log1p(float arg) { -#if HALF_ENABLE_CPP11_CMATH - return expr(std::log1p(arg)); -#else - return expr(static_cast(std::log(1.0 + arg))); -#endif - } - - /// Binary logarithm implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr log2(float arg) { -#if HALF_ENABLE_CPP11_CMATH - return expr(std::log2(arg)); -#else - return expr(static_cast(std::log(static_cast(arg)) * - 1.4426950408889634073599246810019)); -#endif - } - - /// Square root implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr sqrt(float arg) { return expr(std::sqrt(arg)); } - - /// Cubic root implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr cbrt(float arg) { -#if HALF_ENABLE_CPP11_CMATH - return expr(std::cbrt(arg)); -#else - if (builtin_isnan(arg) || builtin_isinf(arg)) - return expr(arg); - return expr(builtin_signbit(arg) - ? -static_cast( - std::pow(-static_cast(arg), 1.0 / 3.0)) - : static_cast( - std::pow(static_cast(arg), 1.0 / 3.0))); -#endif - } - - /// Hypotenuse implementation. - /// \param x first argument - /// \param y second argument - /// \return function value stored in single-preicision - static expr hypot(float x, float y) { -#if HALF_ENABLE_CPP11_CMATH - return expr(std::hypot(x, y)); -#else - return expr( - (builtin_isinf(x) || builtin_isinf(y)) - ? std::numeric_limits::infinity() - : static_cast(std::sqrt(static_cast(x) * x + - static_cast(y) * y))); -#endif - } - - /// Power implementation. - /// \param base value to exponentiate - /// \param exp power to expontiate to - /// \return function value stored in single-preicision - static expr pow(float base, float exp) { return expr(std::pow(base, exp)); } - - /// Sine implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr sin(float arg) { return expr(std::sin(arg)); } - - /// Cosine implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr cos(float arg) { return expr(std::cos(arg)); } - - /// Tan implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr tan(float arg) { return expr(std::tan(arg)); } - - /// Arc sine implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr asin(float arg) { return expr(std::asin(arg)); } - - /// Arc cosine implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr acos(float arg) { return expr(std::acos(arg)); } - - /// Arc tangent implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr atan(float arg) { return expr(std::atan(arg)); } - - /// Arc tangent implementation. - /// \param x first argument - /// \param y second argument - /// \return function value stored in single-preicision - static expr atan2(float x, float y) { return expr(std::atan2(x, y)); } - - /// Hyperbolic sine implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr sinh(float arg) { return expr(std::sinh(arg)); } - - /// Hyperbolic cosine implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr cosh(float arg) { return expr(std::cosh(arg)); } - - /// Hyperbolic tangent implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr tanh(float arg) { return expr(std::tanh(arg)); } - - /// Hyperbolic area sine implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr asinh(float arg) { -#if HALF_ENABLE_CPP11_CMATH - return expr(std::asinh(arg)); -#else - return expr( - (arg == -std::numeric_limits::infinity()) - ? arg - : static_cast(std::log(arg + std::sqrt(arg * arg + 1.0)))); -#endif - } - - /// Hyperbolic area cosine implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr acosh(float arg) { -#if HALF_ENABLE_CPP11_CMATH - return expr(std::acosh(arg)); -#else - return expr((arg < -1.0f) ? std::numeric_limits::quiet_NaN() - : static_cast(std::log( - arg + std::sqrt(arg * arg - 1.0)))); -#endif - } - - /// Hyperbolic area tangent implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr atanh(float arg) { -#if HALF_ENABLE_CPP11_CMATH - return expr(std::atanh(arg)); -#else - return expr(static_cast(0.5 * std::log((1.0 + arg) / (1.0 - arg)))); -#endif - } - - /// Error function implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr erf(float arg) { -#if HALF_ENABLE_CPP11_CMATH - return expr(std::erf(arg)); -#else - return expr(static_cast(erf(static_cast(arg)))); -#endif - } - - /// Complementary implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr erfc(float arg) { -#if HALF_ENABLE_CPP11_CMATH - return expr(std::erfc(arg)); -#else - return expr(static_cast(1.0 - erf(static_cast(arg)))); -#endif - } - - /// Gamma logarithm implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr lgamma(float arg) { -#if HALF_ENABLE_CPP11_CMATH - return expr(std::lgamma(arg)); -#else - if (builtin_isinf(arg)) - return expr(std::numeric_limits::infinity()); - if (arg < 0.0f) { - float i, f = std::modf(-arg, &i); - if (f == 0.0f) - return expr(std::numeric_limits::infinity()); - return expr(static_cast( - 1.1447298858494001741434273513531 - - std::log(std::abs(std::sin(3.1415926535897932384626433832795 * f))) - - lgamma(1.0 - arg))); - } - return expr(static_cast(lgamma(static_cast(arg)))); -#endif - } - - /// Gamma implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr tgamma(float arg) { -#if HALF_ENABLE_CPP11_CMATH - return expr(std::tgamma(arg)); -#else - if (arg == 0.0f) - return builtin_signbit(arg) - ? expr(-std::numeric_limits::infinity()) - : expr(std::numeric_limits::infinity()); - if (arg < 0.0f) { - float i, f = std::modf(-arg, &i); - if (f == 0.0f) - return expr(std::numeric_limits::quiet_NaN()); - double value = 3.1415926535897932384626433832795 / - (std::sin(3.1415926535897932384626433832795 * f) * - std::exp(lgamma(1.0 - arg))); - return expr( - static_cast((std::fmod(i, 2.0f) == 0.0f) ? -value : value)); - } - if (builtin_isinf(arg)) - return expr(arg); - return expr(static_cast(std::exp(lgamma(static_cast(arg))))); -#endif - } - - /// Floor implementation. - /// \param arg value to round - /// \return rounded value - static half floor(half arg) { - return half(binary, round_half(arg.data_)); - } - - /// Ceiling implementation. - /// \param arg value to round - /// \return rounded value - static half ceil(half arg) { - return half(binary, round_half(arg.data_)); - } - - /// Truncation implementation. - /// \param arg value to round - /// \return rounded value - static half trunc(half arg) { - return half(binary, round_half(arg.data_)); - } - - /// Nearest integer implementation. - /// \param arg value to round - /// \return rounded value - static half round(half arg) { return half(binary, round_half_up(arg.data_)); } - - /// Nearest integer implementation. - /// \param arg value to round - /// \return rounded value - static long lround(half arg) { return detail::half2int_up(arg.data_); } - - /// Nearest integer implementation. - /// \param arg value to round - /// \return rounded value - static half rint(half arg) { - return half(binary, round_half(arg.data_)); - } - - /// Nearest integer implementation. - /// \param arg value to round - /// \return rounded value - static long lrint(half arg) { - return detail::half2int(arg.data_); - } - -#if HALF_ENABLE_CPP11_LONG_LONG - /// Nearest integer implementation. - /// \param arg value to round - /// \return rounded value - static long long llround(half arg) { - return detail::half2int_up(arg.data_); - } - - /// Nearest integer implementation. - /// \param arg value to round - /// \return rounded value - static long long llrint(half arg) { - return detail::half2int(arg.data_); - } -#endif - - /// Decompression implementation. - /// \param arg number to decompress - /// \param exp address to store exponent at - /// \return normalized significant - static half frexp(half arg, int* exp) { - int m = arg.data_ & 0x7FFF, e = -14; - if (m >= 0x7C00 || !m) - return *exp = 0, arg; - for (; m < 0x400; m <<= 1, --e) - ; - return *exp = e + (m >> 10), - half(binary, (arg.data_ & 0x8000) | 0x3800 | (m & 0x3FF)); - } - - /// Decompression implementation. - /// \param arg number to decompress - /// \param iptr address to store integer part at - /// \return fractional part - static half modf(half arg, half* iptr) { - uint32_t e = arg.data_ & 0x7FFF; - if (e >= 0x6400) - return *iptr = arg, half(binary, arg.data_ & (0x8000U | -(e > 0x7C00))); - if (e < 0x3C00) - return iptr->data_ = arg.data_ & 0x8000, arg; - e >>= 10; - uint32_t mask = (1 << (25 - e)) - 1, m = arg.data_ & mask; - iptr->data_ = arg.data_ & ~mask; - if (!m) - return half(binary, arg.data_ & 0x8000); - for (; m < 0x400; m <<= 1, --e) - ; - return half(binary, static_cast((arg.data_ & 0x8000) | (e << 10) | - (m & 0x3FF))); - } - - /// Scaling implementation. - /// \param arg number to scale - /// \param exp power of two to scale by - /// \return scaled number - static half scalbln(half arg, long exp) { - uint32_t m = arg.data_ & 0x7FFF; - if (m >= 0x7C00 || !m) - return arg; - for (; m < 0x400; m <<= 1, --exp) - ; - exp += m >> 10; - uint16 value = arg.data_ & 0x8000; - if (exp > 30) { - if (half::round_style == std::round_toward_zero) - value |= 0x7BFF; - else if (half::round_style == std::round_toward_infinity) - value |= 0x7C00 - (value >> 15); - else if (half::round_style == std::round_toward_neg_infinity) - value |= 0x7BFF + (value >> 15); - else - value |= 0x7C00; - } else if (exp > 0) - value |= (exp << 10) | (m & 0x3FF); - else if (exp > -11) { - m = (m & 0x3FF) | 0x400; - if (half::round_style == std::round_to_nearest) { - m += 1 << -exp; -#if HALF_ROUND_TIES_TO_EVEN - m -= (m >> (1 - exp)) & 1; -#endif - } else if (half::round_style == std::round_toward_infinity) - m += ((value >> 15) - 1) & ((1 << (1 - exp)) - 1U); - else if (half::round_style == std::round_toward_neg_infinity) - m += -(value >> 15) & ((1 << (1 - exp)) - 1U); - value |= m >> (1 - exp); - } else if (half::round_style == std::round_toward_infinity) - value -= (value >> 15) - 1; - else if (half::round_style == std::round_toward_neg_infinity) - value += value >> 15; - return half(binary, value); - } - - /// Exponent implementation. - /// \param arg number to query - /// \return floating point exponent - static int ilogb(half arg) { - int abs = arg.data_ & 0x7FFF; - if (!abs) - return FP_ILOGB0; - if (abs < 0x7C00) { - int exp = (abs >> 10) - 15; - if (abs < 0x400) - for (; abs < 0x200; abs <<= 1, --exp) - ; - return exp; - } - if (abs > 0x7C00) - return FP_ILOGBNAN; - return INT_MAX; - } - - /// Exponent implementation. - /// \param arg number to query - /// \return floating point exponent - static half logb(half arg) { - int abs = arg.data_ & 0x7FFF; - if (!abs) - return half(binary, 0xFC00); - if (abs < 0x7C00) { - int exp = (abs >> 10) - 15; - if (abs < 0x400) - for (; abs < 0x200; abs <<= 1, --exp) - ; - uint16 bits = (exp < 0) << 15; - if (exp) { - uint32_t m = std::abs(exp) << 6, e = 18; - for (; m < 0x400; m <<= 1, --e) - ; - bits |= (e << 10) + m; - } - return half(binary, bits); - } - if (abs > 0x7C00) - return arg; - return half(binary, 0x7C00); - } - - /// Enumeration implementation. - /// \param from number to increase/decrease - /// \param to direction to enumerate into - /// \return next representable number - static half nextafter(half from, half to) { - uint16 fabs = from.data_ & 0x7FFF, tabs = to.data_ & 0x7FFF; - if (fabs > 0x7C00) - return from; - if (tabs > 0x7C00 || from.data_ == to.data_ || !(fabs | tabs)) - return to; - if (!fabs) - return half(binary, (to.data_ & 0x8000) + 1); - bool lt = - ((fabs == from.data_) ? static_cast(fabs) - : -static_cast(fabs)) < - ((tabs == to.data_) ? static_cast(tabs) : -static_cast(tabs)); - return half(binary, - from.data_ + - (((from.data_ >> 15) ^ static_cast(lt)) << 1) - - 1); - } - - /// Enumeration implementation. - /// \param from number to increase/decrease - /// \param to direction to enumerate into - /// \return next representable number - static half nexttoward(half from, long double to) { - if (isnan(from)) - return from; - long double lfrom = static_cast(from); - if (builtin_isnan(to) || lfrom == to) - return half(static_cast(to)); - if (!(from.data_ & 0x7FFF)) - return half(binary, - (static_cast(builtin_signbit(to)) << 15) + 1); - return half( - binary, - from.data_ + - (((from.data_ >> 15) ^ static_cast(lfrom < to)) << 1) - - 1); - } - - /// Sign implementation - /// \param x first operand - /// \param y second operand - /// \return composed value - static half copysign(half x, half y) { - return half(binary, x.data_ ^ ((x.data_ ^ y.data_) & 0x8000)); - } - - /// Classification implementation. - /// \param arg value to classify - /// \retval true if infinite number - /// \retval false else - static int fpclassify(half arg) { - uint32_t abs = arg.data_ & 0x7FFF; - return abs ? ((abs > 0x3FF) ? ((abs >= 0x7C00) - ? ((abs > 0x7C00) ? FP_NAN : FP_INFINITE) - : FP_NORMAL) - : FP_SUBNORMAL) - : FP_ZERO; - } - - /// Classification implementation. - /// \param arg value to classify - /// \retval true if finite number - /// \retval false else - static bool isfinite(half arg) { return (arg.data_ & 0x7C00) != 0x7C00; } - - /// Classification implementation. - /// \param arg value to classify - /// \retval true if infinite number - /// \retval false else - static bool isinf(half arg) { return (arg.data_ & 0x7FFF) == 0x7C00; } - - /// Classification implementation. - /// \param arg value to classify - /// \retval true if not a number - /// \retval false else - static bool isnan(half arg) { return (arg.data_ & 0x7FFF) > 0x7C00; } - - /// Classification implementation. - /// \param arg value to classify - /// \retval true if normal number - /// \retval false else - static bool isnormal(half arg) { - return ((arg.data_ & 0x7C00) != 0) & ((arg.data_ & 0x7C00) != 0x7C00); - } - - /// Sign bit implementation. - /// \param arg value to check - /// \retval true if signed - /// \retval false if unsigned - static bool signbit(half arg) { return (arg.data_ & 0x8000) != 0; } - - /// Comparison implementation. - /// \param x first operand - /// \param y second operand - /// \retval true if operands equal - /// \retval false else - static bool isequal(half x, half y) { - return (x.data_ == y.data_ || !((x.data_ | y.data_) & 0x7FFF)) && !isnan(x); - } - - /// Comparison implementation. - /// \param x first operand - /// \param y second operand - /// \retval true if operands not equal - /// \retval false else - static bool isnotequal(half x, half y) { - return (x.data_ != y.data_ && ((x.data_ | y.data_) & 0x7FFF)) || isnan(x); - } - - /// Comparison implementation. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x > \a y - /// \retval false else - static bool isgreater(half x, half y) { - int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; - return xabs <= 0x7C00 && yabs <= 0x7C00 && - (((xabs == x.data_) ? xabs : -xabs) > - ((yabs == y.data_) ? yabs : -yabs)); - } - - /// Comparison implementation. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x >= \a y - /// \retval false else - static bool isgreaterequal(half x, half y) { - int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; - return xabs <= 0x7C00 && yabs <= 0x7C00 && - (((xabs == x.data_) ? xabs : -xabs) >= - ((yabs == y.data_) ? yabs : -yabs)); - } - - /// Comparison implementation. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x < \a y - /// \retval false else - static bool isless(half x, half y) { - int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; - return xabs <= 0x7C00 && yabs <= 0x7C00 && - (((xabs == x.data_) ? xabs : -xabs) < - ((yabs == y.data_) ? yabs : -yabs)); - } - - /// Comparison implementation. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x <= \a y - /// \retval false else - static bool islessequal(half x, half y) { - int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; - return xabs <= 0x7C00 && yabs <= 0x7C00 && - (((xabs == x.data_) ? xabs : -xabs) <= - ((yabs == y.data_) ? yabs : -yabs)); - } - - /// Comparison implementation. - /// \param x first operand - /// \param y second operand - /// \retval true if either \a x > \a y nor \a x < \a y - /// \retval false else - static bool islessgreater(half x, half y) { - int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; - if (xabs > 0x7C00 || yabs > 0x7C00) - return false; - int a = (xabs == x.data_) ? xabs : -xabs, - b = (yabs == y.data_) ? yabs : -yabs; - return a < b || a > b; - } - - /// Comparison implementation. - /// \param x first operand - /// \param y second operand - /// \retval true if operand unordered - /// \retval false else - static bool isunordered(half x, half y) { return isnan(x) || isnan(y); } - - private: - static double erf(double arg) { - if (builtin_isinf(arg)) - return (arg < 0.0) ? -1.0 : 1.0; - double x2 = arg * arg, ax2 = 0.147 * x2, - value = std::sqrt( - 1.0 - std::exp(-x2 * (1.2732395447351626861510701069801 + ax2) / - (1.0 + ax2))); - return builtin_signbit(arg) ? -value : value; - } - - static double lgamma(double arg) { - double v = 1.0; - for (; arg < 8.0; ++arg) - v *= arg; - double w = 1.0 / (arg * arg); - return (((((((-0.02955065359477124183006535947712 * w + - 0.00641025641025641025641025641026) * - w + - -0.00191752691752691752691752691753) * - w + - 8.4175084175084175084175084175084e-4) * - w + - -5.952380952380952380952380952381e-4) * - w + - 7.9365079365079365079365079365079e-4) * - w + - -0.00277777777777777777777777777778) * - w + - 0.08333333333333333333333333333333) / - arg + - 0.91893853320467274178032973640562 - std::log(v) - arg + - (arg - 0.5) * std::log(arg); - } -}; - -/// Wrapper for unary half-precision functions needing specialization for -/// individual argument types. -/// \tparam T argument type -template struct unary_specialized { - /// Negation implementation. - /// \param arg value to negate - /// \return negated value - static HALF_CONSTEXPR half negate(half arg) { - return half(binary, arg.data_ ^ 0x8000); - } - - /// Absolute value implementation. - /// \param arg function argument - /// \return absolute value - static half fabs(half arg) { return half(binary, arg.data_ & 0x7FFF); } -}; -template <> struct unary_specialized { - static HALF_CONSTEXPR expr negate(float arg) { return expr(-arg); } - static expr fabs(float arg) { return expr(std::fabs(arg)); } -}; - -/// Wrapper for binary half-precision functions needing specialization for -/// individual argument types. -/// \tparam T first argument type -/// \tparam U first argument type -template struct binary_specialized { - /// Minimum implementation. - /// \param x first operand - /// \param y second operand - /// \return minimum value - static expr fmin(float x, float y) { -#if HALF_ENABLE_CPP11_CMATH - return expr(std::fmin(x, y)); -#else - if (builtin_isnan(x)) - return expr(y); - if (builtin_isnan(y)) - return expr(x); - return expr(std::min(x, y)); -#endif - } - - /// Maximum implementation. - /// \param x first operand - /// \param y second operand - /// \return maximum value - static expr fmax(float x, float y) { -#if HALF_ENABLE_CPP11_CMATH - return expr(std::fmax(x, y)); -#else - if (builtin_isnan(x)) - return expr(y); - if (builtin_isnan(y)) - return expr(x); - return expr(std::max(x, y)); -#endif - } -}; -template <> struct binary_specialized { - static half fmin(half x, half y) { - int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; - if (xabs > 0x7C00) - return y; - if (yabs > 0x7C00) - return x; - return (((xabs == x.data_) ? xabs : -xabs) > - ((yabs == y.data_) ? yabs : -yabs)) - ? y - : x; - } - static half fmax(half x, half y) { - int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; - if (xabs > 0x7C00) - return y; - if (yabs > 0x7C00) - return x; - return (((xabs == x.data_) ? xabs : -xabs) < - ((yabs == y.data_) ? yabs : -yabs)) - ? y - : x; - } -}; - -/// Helper class for half casts. -/// This class template has to be specialized for all valid cast argument to -/// define an appropriate static `cast` member -/// function and a corresponding `type` member denoting its return type. -/// \tparam T destination type -/// \tparam U source type -/// \tparam R rounding mode to use -template -struct half_caster {}; -template struct half_caster { -#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS - static_assert(std::is_arithmetic::value, - "half_cast from non-arithmetic type unsupported"); -#endif - - static half cast(U arg) { return cast_impl(arg, is_float()); }; - - private: - static half cast_impl(U arg, true_type) { - return half(binary, float2half(arg)); - } - static half cast_impl(U arg, false_type) { - return half(binary, int2half(arg)); - } -}; -template struct half_caster { -#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS - static_assert(std::is_arithmetic::value, - "half_cast to non-arithmetic type unsupported"); -#endif - - static T cast(half arg) { return cast_impl(arg, is_float()); } - - private: - static T cast_impl(half arg, true_type) { return half2float(arg.data_); } - static T cast_impl(half arg, false_type) { return half2int(arg.data_); } -}; -template struct half_caster { -#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS - static_assert(std::is_arithmetic::value, - "half_cast to non-arithmetic type unsupported"); -#endif - - static T cast(expr arg) { return cast_impl(arg, is_float()); } - - private: - static T cast_impl(float arg, true_type) { return static_cast(arg); } - static T cast_impl(half arg, false_type) { return half2int(arg.data_); } -}; -template struct half_caster { - static half cast(half arg) { return arg; } -}; -template -struct half_caster : half_caster {}; - -/// \name Comparison operators -/// \{ - -/// Comparison for equality. -/// \param x first operand -/// \param y second operand -/// \retval true if operands equal -/// \retval false else -template -typename enable::type operator==(T x, U y) { - return functions::isequal(x, y); -} - -/// Comparison for inequality. -/// \param x first operand -/// \param y second operand -/// \retval true if operands not equal -/// \retval false else -template -typename enable::type operator!=(T x, U y) { - return functions::isnotequal(x, y); -} - -/// Comparison for less than. -/// \param x first operand -/// \param y second operand -/// \retval true if \a x less than \a y -/// \retval false else -template -typename enable::type operator<(T x, U y) { - return functions::isless(x, y); -} - -/// Comparison for greater than. -/// \param x first operand -/// \param y second operand -/// \retval true if \a x greater than \a y -/// \retval false else -template -typename enable::type operator>(T x, U y) { - return functions::isgreater(x, y); -} - -/// Comparison for less equal. -/// \param x first operand -/// \param y second operand -/// \retval true if \a x less equal \a y -/// \retval false else -template -typename enable::type operator<=(T x, U y) { - return functions::islessequal(x, y); -} - -/// Comparison for greater equal. -/// \param x first operand -/// \param y second operand -/// \retval true if \a x greater equal \a y -/// \retval false else -template -typename enable::type operator>=(T x, U y) { - return functions::isgreaterequal(x, y); -} - -/// \} -/// \name Arithmetic operators -/// \{ - -/// Add halfs. -/// \param x left operand -/// \param y right operand -/// \return sum of half expressions -template -typename enable::type operator+(T x, U y) { - return functions::plus(x, y); -} - -/// Subtract halfs. -/// \param x left operand -/// \param y right operand -/// \return difference of half expressions -template -typename enable::type operator-(T x, U y) { - return functions::minus(x, y); -} - -/// Multiply halfs. -/// \param x left operand -/// \param y right operand -/// \return product of half expressions -template -typename enable::type operator*(T x, U y) { - return functions::multiplies(x, y); -} - -/// Divide halfs. -/// \param x left operand -/// \param y right operand -/// \return quotient of half expressions -template -typename enable::type operator/(T x, U y) { - return functions::divides(x, y); -} - -/// Identity. -/// \param arg operand -/// \return uncahnged operand -template -HALF_CONSTEXPR typename enable::type operator+(T arg) { - return arg; -} - -/// Negation. -/// \param arg operand -/// \return negated operand -template -HALF_CONSTEXPR typename enable::type operator-(T arg) { - return unary_specialized::negate(arg); -} - -/// \} -/// \name Input and output -/// \{ - -/// Output operator. -/// \param out output stream to write into -/// \param arg half expression to write -/// \return reference to output stream -template -typename enable&, T>::type -operator<<(std::basic_ostream& out, T arg) { - return functions::write(out, arg); -} - -/// Input operator. -/// \param in input stream to read from -/// \param arg half to read into -/// \return reference to input stream -template -std::basic_istream& -operator>>(std::basic_istream& in, half& arg) { - return functions::read(in, arg); -} - -/// \} -/// \name Basic mathematical operations -/// \{ - -/// Absolute value. -/// \param arg operand -/// \return absolute value of \a arg -// template typename enable::type abs(T arg) { -// return unary_specialized::fabs(arg); } -inline half abs(half arg) { return unary_specialized::fabs(arg); } -inline expr abs(expr arg) { return unary_specialized::fabs(arg); } - -/// Absolute value. -/// \param arg operand -/// \return absolute value of \a arg -// template typename enable::type fabs(T arg) { -// return unary_specialized::fabs(arg); } -inline half fabs(half arg) { return unary_specialized::fabs(arg); } -inline expr fabs(expr arg) { return unary_specialized::fabs(arg); } - -/// Remainder of division. -/// \param x first operand -/// \param y second operand -/// \return remainder of floating point division. -// template typename enable::type -// fmod(T x, U y) { return functions::fmod(x, y); } -inline expr fmod(half x, half y) { return functions::fmod(x, y); } -inline expr fmod(half x, expr y) { return functions::fmod(x, y); } -inline expr fmod(expr x, half y) { return functions::fmod(x, y); } -inline expr fmod(expr x, expr y) { return functions::fmod(x, y); } - -/// Remainder of division. -/// \param x first operand -/// \param y second operand -/// \return remainder of floating point division. -// template typename enable::type -// remainder(T x, U y) { return -// functions::remainder(x, y); } -inline expr remainder(half x, half y) { return functions::remainder(x, y); } -inline expr remainder(half x, expr y) { return functions::remainder(x, y); } -inline expr remainder(expr x, half y) { return functions::remainder(x, y); } -inline expr remainder(expr x, expr y) { return functions::remainder(x, y); } - -/// Remainder of division. -/// \param x first operand -/// \param y second operand -/// \param quo address to store some bits of quotient at -/// \return remainder of floating point division. -// template typename enable::type -// remquo(T x, U y, int *quo) { return -// functions::remquo(x, y, quo); } -inline expr remquo(half x, half y, int* quo) { - return functions::remquo(x, y, quo); -} -inline expr remquo(half x, expr y, int* quo) { - return functions::remquo(x, y, quo); -} -inline expr remquo(expr x, half y, int* quo) { - return functions::remquo(x, y, quo); -} -inline expr remquo(expr x, expr y, int* quo) { - return functions::remquo(x, y, quo); -} - -/// Fused multiply add. -/// \param x first operand -/// \param y second operand -/// \param z third operand -/// \return ( \a x * \a y ) + \a z rounded as one operation. -// template typename -// enable::type fma(T x, U y, V z) { return -// functions::fma(x, y, z); } -inline expr fma(half x, half y, half z) { return functions::fma(x, y, z); } -inline expr fma(half x, half y, expr z) { return functions::fma(x, y, z); } -inline expr fma(half x, expr y, half z) { return functions::fma(x, y, z); } -inline expr fma(half x, expr y, expr z) { return functions::fma(x, y, z); } -inline expr fma(expr x, half y, half z) { return functions::fma(x, y, z); } -inline expr fma(expr x, half y, expr z) { return functions::fma(x, y, z); } -inline expr fma(expr x, expr y, half z) { return functions::fma(x, y, z); } -inline expr fma(expr x, expr y, expr z) { return functions::fma(x, y, z); } - -/// Maximum of half expressions. -/// \param x first operand -/// \param y second operand -/// \return maximum of operands -// template typename result::type -// fmax(T -// x, U y) { return -// binary_specialized::fmax(x, y); } -inline half fmax(half x, half y) { - return binary_specialized::fmax(x, y); -} -inline expr fmax(half x, expr y) { - return binary_specialized::fmax(x, y); -} -inline expr fmax(expr x, half y) { - return binary_specialized::fmax(x, y); -} -inline expr fmax(expr x, expr y) { - return binary_specialized::fmax(x, y); -} - -/// Minimum of half expressions. -/// \param x first operand -/// \param y second operand -/// \return minimum of operands -// template typename result::type -// fmin(T -// x, U y) { return -// binary_specialized::fmin(x, y); } -inline half fmin(half x, half y) { - return binary_specialized::fmin(x, y); -} -inline expr fmin(half x, expr y) { - return binary_specialized::fmin(x, y); -} -inline expr fmin(expr x, half y) { - return binary_specialized::fmin(x, y); -} -inline expr fmin(expr x, expr y) { - return binary_specialized::fmin(x, y); -} - -/// Positive difference. -/// \param x first operand -/// \param y second operand -/// \return \a x - \a y or 0 if difference negative -// template typename enable::type -// fdim(T x, U y) { return functions::fdim(x, y); } -inline expr fdim(half x, half y) { return functions::fdim(x, y); } -inline expr fdim(half x, expr y) { return functions::fdim(x, y); } -inline expr fdim(expr x, half y) { return functions::fdim(x, y); } -inline expr fdim(expr x, expr y) { return functions::fdim(x, y); } - -/// Get NaN value. -/// \return quiet NaN -inline half nanh(const char*) { return functions::nanh(); } - -/// \} -/// \name Exponential functions -/// \{ - -/// Exponential function. -/// \param arg function argument -/// \return e raised to \a arg -// template typename enable::type exp(T arg) { -// return functions::exp(arg); } -inline expr exp(half arg) { return functions::exp(arg); } -inline expr exp(expr arg) { return functions::exp(arg); } - -/// Exponential minus one. -/// \param arg function argument -/// \return e raised to \a arg subtracted by 1 -// template typename enable::type expm1(T arg) -//{ -// return functions::expm1(arg); } -inline expr expm1(half arg) { return functions::expm1(arg); } -inline expr expm1(expr arg) { return functions::expm1(arg); } - -/// Binary exponential. -/// \param arg function argument -/// \return 2 raised to \a arg -// template typename enable::type exp2(T arg) { -// return functions::exp2(arg); } -inline expr exp2(half arg) { return functions::exp2(arg); } -inline expr exp2(expr arg) { return functions::exp2(arg); } - -/// Natural logorithm. -/// \param arg function argument -/// \return logarithm of \a arg to base e -// template typename enable::type log(T arg) { -// return functions::log(arg); } -inline expr log(half arg) { return functions::log(arg); } -inline expr log(expr arg) { return functions::log(arg); } - -/// Common logorithm. -/// \param arg function argument -/// \return logarithm of \a arg to base 10 -// template typename enable::type log10(T arg) -//{ -// return functions::log10(arg); } -inline expr log10(half arg) { return functions::log10(arg); } -inline expr log10(expr arg) { return functions::log10(arg); } - -/// Natural logorithm. -/// \param arg function argument -/// \return logarithm of \a arg plus 1 to base e -// template typename enable::type log1p(T arg) -//{ -// return functions::log1p(arg); } -inline expr log1p(half arg) { return functions::log1p(arg); } -inline expr log1p(expr arg) { return functions::log1p(arg); } - -/// Binary logorithm. -/// \param arg function argument -/// \return logarithm of \a arg to base 2 -// template typename enable::type log2(T arg) { -// return functions::log2(arg); } -inline expr log2(half arg) { return functions::log2(arg); } -inline expr log2(expr arg) { return functions::log2(arg); } - -/// \} -/// \name Power functions -/// \{ - -/// Square root. -/// \param arg function argument -/// \return square root of \a arg -// template typename enable::type sqrt(T arg) { -// return functions::sqrt(arg); } -inline expr sqrt(half arg) { return functions::sqrt(arg); } -inline expr sqrt(expr arg) { return functions::sqrt(arg); } - -/// Cubic root. -/// \param arg function argument -/// \return cubic root of \a arg -// template typename enable::type cbrt(T arg) { -// return functions::cbrt(arg); } -inline expr cbrt(half arg) { return functions::cbrt(arg); } -inline expr cbrt(expr arg) { return functions::cbrt(arg); } - -/// Hypotenuse function. -/// \param x first argument -/// \param y second argument -/// \return square root of sum of squares without internal over- or underflows -// template typename enable::type -// hypot(T x, U y) { return functions::hypot(x, y); -//} -inline expr hypot(half x, half y) { return functions::hypot(x, y); } -inline expr hypot(half x, expr y) { return functions::hypot(x, y); } -inline expr hypot(expr x, half y) { return functions::hypot(x, y); } -inline expr hypot(expr x, expr y) { return functions::hypot(x, y); } - -/// Power function. -/// \param base first argument -/// \param exp second argument -/// \return \a base raised to \a exp -// template typename enable::type -// pow(T base, U exp) { return functions::pow(base, -// exp); } -inline expr pow(half base, half exp) { return functions::pow(base, exp); } -inline expr pow(half base, expr exp) { return functions::pow(base, exp); } -inline expr pow(expr base, half exp) { return functions::pow(base, exp); } -inline expr pow(expr base, expr exp) { return functions::pow(base, exp); } - -/// \} -/// \name Trigonometric functions -/// \{ - -/// Sine function. -/// \param arg function argument -/// \return sine value of \a arg -// template typename enable::type sin(T arg) { -// return functions::sin(arg); } -inline expr sin(half arg) { return functions::sin(arg); } -inline expr sin(expr arg) { return functions::sin(arg); } - -/// Cosine function. -/// \param arg function argument -/// \return cosine value of \a arg -// template typename enable::type cos(T arg) { -// return functions::cos(arg); } -inline expr cos(half arg) { return functions::cos(arg); } -inline expr cos(expr arg) { return functions::cos(arg); } - -/// Tangent function. -/// \param arg function argument -/// \return tangent value of \a arg -// template typename enable::type tan(T arg) { -// return functions::tan(arg); } -inline expr tan(half arg) { return functions::tan(arg); } -inline expr tan(expr arg) { return functions::tan(arg); } - -/// Arc sine. -/// \param arg function argument -/// \return arc sine value of \a arg -// template typename enable::type asin(T arg) { -// return functions::asin(arg); } -inline expr asin(half arg) { return functions::asin(arg); } -inline expr asin(expr arg) { return functions::asin(arg); } - -/// Arc cosine function. -/// \param arg function argument -/// \return arc cosine value of \a arg -// template typename enable::type acos(T arg) { -// return functions::acos(arg); } -inline expr acos(half arg) { return functions::acos(arg); } -inline expr acos(expr arg) { return functions::acos(arg); } - -/// Arc tangent function. -/// \param arg function argument -/// \return arc tangent value of \a arg -// template typename enable::type atan(T arg) { -// return functions::atan(arg); } -inline expr atan(half arg) { return functions::atan(arg); } -inline expr atan(expr arg) { return functions::atan(arg); } - -/// Arc tangent function. -/// \param x first argument -/// \param y second argument -/// \return arc tangent value -// template typename enable::type -// atan2(T x, U y) { return functions::atan2(x, y); -//} -inline expr atan2(half x, half y) { return functions::atan2(x, y); } -inline expr atan2(half x, expr y) { return functions::atan2(x, y); } -inline expr atan2(expr x, half y) { return functions::atan2(x, y); } -inline expr atan2(expr x, expr y) { return functions::atan2(x, y); } - -/// \} -/// \name Hyperbolic functions -/// \{ - -/// Hyperbolic sine. -/// \param arg function argument -/// \return hyperbolic sine value of \a arg -// template typename enable::type sinh(T arg) { -// return functions::sinh(arg); } -inline expr sinh(half arg) { return functions::sinh(arg); } -inline expr sinh(expr arg) { return functions::sinh(arg); } - -/// Hyperbolic cosine. -/// \param arg function argument -/// \return hyperbolic cosine value of \a arg -// template typename enable::type cosh(T arg) { -// return functions::cosh(arg); } -inline expr cosh(half arg) { return functions::cosh(arg); } -inline expr cosh(expr arg) { return functions::cosh(arg); } - -/// Hyperbolic tangent. -/// \param arg function argument -/// \return hyperbolic tangent value of \a arg -// template typename enable::type tanh(T arg) { -// return functions::tanh(arg); } -inline expr tanh(half arg) { return functions::tanh(arg); } -inline expr tanh(expr arg) { return functions::tanh(arg); } - -/// Hyperbolic area sine. -/// \param arg function argument -/// \return area sine value of \a arg -// template typename enable::type asinh(T arg) -//{ -// return functions::asinh(arg); } -inline expr asinh(half arg) { return functions::asinh(arg); } -inline expr asinh(expr arg) { return functions::asinh(arg); } - -/// Hyperbolic area cosine. -/// \param arg function argument -/// \return area cosine value of \a arg -// template typename enable::type acosh(T arg) -//{ -// return functions::acosh(arg); } -inline expr acosh(half arg) { return functions::acosh(arg); } -inline expr acosh(expr arg) { return functions::acosh(arg); } - -/// Hyperbolic area tangent. -/// \param arg function argument -/// \return area tangent value of \a arg -// template typename enable::type atanh(T arg) -//{ -// return functions::atanh(arg); } -inline expr atanh(half arg) { return functions::atanh(arg); } -inline expr atanh(expr arg) { return functions::atanh(arg); } - -/// \} -/// \name Error and gamma functions -/// \{ - -/// Error function. -/// \param arg function argument -/// \return error function value of \a arg -// template typename enable::type erf(T arg) { -// return functions::erf(arg); } -inline expr erf(half arg) { return functions::erf(arg); } -inline expr erf(expr arg) { return functions::erf(arg); } - -/// Complementary error function. -/// \param arg function argument -/// \return 1 minus error function value of \a arg -// template typename enable::type erfc(T arg) { -// return functions::erfc(arg); } -inline expr erfc(half arg) { return functions::erfc(arg); } -inline expr erfc(expr arg) { return functions::erfc(arg); } - -/// Natural logarithm of gamma function. -/// \param arg function argument -/// \return natural logarith of gamma function for \a arg -// template typename enable::type lgamma(T arg) -//{ -// return functions::lgamma(arg); } -inline expr lgamma(half arg) { return functions::lgamma(arg); } -inline expr lgamma(expr arg) { return functions::lgamma(arg); } - -/// Gamma function. -/// \param arg function argument -/// \return gamma function value of \a arg -// template typename enable::type tgamma(T arg) -//{ -// return functions::tgamma(arg); } -inline expr tgamma(half arg) { return functions::tgamma(arg); } -inline expr tgamma(expr arg) { return functions::tgamma(arg); } - -/// \} -/// \name Rounding -/// \{ - -/// Nearest integer not less than half value. -/// \param arg half to round -/// \return nearest integer not less than \a arg -// template typename enable::type ceil(T arg) { -// return functions::ceil(arg); } -inline half ceil(half arg) { return functions::ceil(arg); } -inline half ceil(expr arg) { return functions::ceil(arg); } - -/// Nearest integer not greater than half value. -/// \param arg half to round -/// \return nearest integer not greater than \a arg -// template typename enable::type floor(T arg) -//{ -// return functions::floor(arg); } -inline half floor(half arg) { return functions::floor(arg); } -inline half floor(expr arg) { return functions::floor(arg); } - -/// Nearest integer not greater in magnitude than half value. -/// \param arg half to round -/// \return nearest integer not greater in magnitude than \a arg -// template typename enable::type trunc(T arg) -//{ -// return functions::trunc(arg); } -inline half trunc(half arg) { return functions::trunc(arg); } -inline half trunc(expr arg) { return functions::trunc(arg); } - -/// Nearest integer. -/// \param arg half to round -/// \return nearest integer, rounded away from zero in half-way cases -// template typename enable::type round(T arg) -//{ -// return functions::round(arg); } -inline half round(half arg) { return functions::round(arg); } -inline half round(expr arg) { return functions::round(arg); } - -/// Nearest integer. -/// \param arg half to round -/// \return nearest integer, rounded away from zero in half-way cases -// template typename enable::type lround(T arg) -//{ -// return functions::lround(arg); } -inline long lround(half arg) { return functions::lround(arg); } -inline long lround(expr arg) { return functions::lround(arg); } - -/// Nearest integer using half's internal rounding mode. -/// \param arg half expression to round -/// \return nearest integer using default rounding mode -// template typename enable::type nearbyint(T -// arg) { return functions::nearbyint(arg); } -inline half nearbyint(half arg) { return functions::rint(arg); } -inline half nearbyint(expr arg) { return functions::rint(arg); } - -/// Nearest integer using half's internal rounding mode. -/// \param arg half expression to round -/// \return nearest integer using default rounding mode -// template typename enable::type rint(T arg) { -// return functions::rint(arg); } -inline half rint(half arg) { return functions::rint(arg); } -inline half rint(expr arg) { return functions::rint(arg); } - -/// Nearest integer using half's internal rounding mode. -/// \param arg half expression to round -/// \return nearest integer using default rounding mode -// template typename enable::type lrint(T arg) -//{ -// return functions::lrint(arg); } -inline long lrint(half arg) { return functions::lrint(arg); } -inline long lrint(expr arg) { return functions::lrint(arg); } -#if HALF_ENABLE_CPP11_LONG_LONG -/// Nearest integer. -/// \param arg half to round -/// \return nearest integer, rounded away from zero in half-way cases -// template typename enable::type -// llround(T -// arg) { return functions::llround(arg); } -inline long long llround(half arg) { return functions::llround(arg); } -inline long long llround(expr arg) { return functions::llround(arg); } - -/// Nearest integer using half's internal rounding mode. -/// \param arg half expression to round -/// \return nearest integer using default rounding mode -// template typename enable::type llrint(T -// arg) { return functions::llrint(arg); } -inline long long llrint(half arg) { return functions::llrint(arg); } -inline long long llrint(expr arg) { return functions::llrint(arg); } -#endif - -/// \} -/// \name Floating point manipulation -/// \{ - -/// Decompress floating point number. -/// \param arg number to decompress -/// \param exp address to store exponent at -/// \return significant in range [0.5, 1) -// template typename enable::type frexp(T arg, -// int *exp) { return functions::frexp(arg, exp); } -inline half frexp(half arg, int* exp) { return functions::frexp(arg, exp); } -inline half frexp(expr arg, int* exp) { return functions::frexp(arg, exp); } - -/// Multiply by power of two. -/// \param arg number to modify -/// \param exp power of two to multiply with -/// \return \a arg multplied by 2 raised to \a exp -// template typename enable::type ldexp(T arg, -// int exp) { return functions::scalbln(arg, exp); -//} -inline half ldexp(half arg, int exp) { return functions::scalbln(arg, exp); } -inline half ldexp(expr arg, int exp) { return functions::scalbln(arg, exp); } - -/// Extract integer and fractional parts. -/// \param arg number to decompress -/// \param iptr address to store integer part at -/// \return fractional part -// template typename enable::type modf(T arg, -// half *iptr) { return functions::modf(arg, iptr); -//} -inline half modf(half arg, half* iptr) { return functions::modf(arg, iptr); } -inline half modf(expr arg, half* iptr) { return functions::modf(arg, iptr); } - -/// Multiply by power of two. -/// \param arg number to modify -/// \param exp power of two to multiply with -/// \return \a arg multplied by 2 raised to \a exp -// template typename enable::type scalbn(T arg, -// int exp) { return functions::scalbln(arg, exp); -//} -inline half scalbn(half arg, int exp) { return functions::scalbln(arg, exp); } -inline half scalbn(expr arg, int exp) { return functions::scalbln(arg, exp); } - -/// Multiply by power of two. -/// \param arg number to modify -/// \param exp power of two to multiply with -/// \return \a arg multplied by 2 raised to \a exp -// template typename enable::type scalbln(T -// arg, -// long exp) { return functions::scalbln(arg, -// exp); -//} -inline half scalbln(half arg, long exp) { return functions::scalbln(arg, exp); } -inline half scalbln(expr arg, long exp) { return functions::scalbln(arg, exp); } - -/// Extract exponent. -/// \param arg number to query -/// \return floating point exponent -/// \retval FP_ILOGB0 for zero -/// \retval FP_ILOGBNAN for NaN -/// \retval MAX_INT for infinity -// template typename enable::type ilogb(T arg) { -// return functions::ilogb(arg); } -inline int ilogb(half arg) { return functions::ilogb(arg); } -inline int ilogb(expr arg) { return functions::ilogb(arg); } - -/// Extract exponent. -/// \param arg number to query -/// \return floating point exponent -// template typename enable::type logb(T arg) { -// return functions::logb(arg); } -inline half logb(half arg) { return functions::logb(arg); } -inline half logb(expr arg) { return functions::logb(arg); } - -/// Next representable value. -/// \param from value to compute next representable value for -/// \param to direction towards which to compute next value -/// \return next representable value after \a from in direction towards \a to -// template typename enable::type -// nextafter(T from, U to) { return -// functions::nextafter(from, to); } -inline half nextafter(half from, half to) { - return functions::nextafter(from, to); -} -inline half nextafter(half from, expr to) { - return functions::nextafter(from, to); -} -inline half nextafter(expr from, half to) { - return functions::nextafter(from, to); -} -inline half nextafter(expr from, expr to) { - return functions::nextafter(from, to); -} - -/// Next representable value. -/// \param from value to compute next representable value for -/// \param to direction towards which to compute next value -/// \return next representable value after \a from in direction towards \a to -// template typename enable::type nexttoward(T -// from, long double to) { return -// functions::nexttoward(from, to); } -inline half nexttoward(half from, long double to) { - return functions::nexttoward(from, to); -} -inline half nexttoward(expr from, long double to) { - return functions::nexttoward(from, to); -} - -/// Take sign. -/// \param x value to change sign for -/// \param y value to take sign from -/// \return value equal to \a x in magnitude and to \a y in sign -// template typename enable::type -// copysign(T x, U y) { return -// functions::copysign(x, y); } -inline half copysign(half x, half y) { return functions::copysign(x, y); } -inline half copysign(half x, expr y) { return functions::copysign(x, y); } -inline half copysign(expr x, half y) { return functions::copysign(x, y); } -inline half copysign(expr x, expr y) { return functions::copysign(x, y); } - -/// \} -/// \name Floating point classification -/// \{ - -/// Classify floating point value. -/// \param arg number to classify -/// \retval FP_ZERO for positive and negative zero -/// \retval FP_SUBNORMAL for subnormal numbers -/// \retval FP_INFINITY for positive and negative infinity -/// \retval FP_NAN for NaNs -/// \retval FP_NORMAL for all other (normal) values -// template typename enable::type fpclassify(T -// arg) { return functions::fpclassify(arg); } -inline int fpclassify(half arg) { return functions::fpclassify(arg); } -inline int fpclassify(expr arg) { return functions::fpclassify(arg); } - -/// Check if finite number. -/// \param arg number to check -/// \retval true if neither infinity nor NaN -/// \retval false else -// template typename enable::type isfinite(T -// arg) -//{ return functions::isfinite(arg); } -inline bool isfinite(half arg) { return functions::isfinite(arg); } -inline bool isfinite(expr arg) { return functions::isfinite(arg); } - -/// Check for infinity. -/// \param arg number to check -/// \retval true for positive or negative infinity -/// \retval false else -// template typename enable::type isinf(T arg) -//{ -// return functions::isinf(arg); } -inline bool isinf(half arg) { return functions::isinf(arg); } -inline bool isinf(expr arg) { return functions::isinf(arg); } - -/// Check for NaN. -/// \param arg number to check -/// \retval true for NaNs -/// \retval false else -// template typename enable::type isnan(T arg) -//{ -// return functions::isnan(arg); } -inline bool isnan(half arg) { return functions::isnan(arg); } -inline bool isnan(expr arg) { return functions::isnan(arg); } - -/// Check if normal number. -/// \param arg number to check -/// \retval true if normal number -/// \retval false if either subnormal, zero, infinity or NaN -// template typename enable::type isnormal(T -// arg) -//{ return functions::isnormal(arg); } -inline bool isnormal(half arg) { return functions::isnormal(arg); } -inline bool isnormal(expr arg) { return functions::isnormal(arg); } - -/// Check sign. -/// \param arg number to check -/// \retval true for negative number -/// \retval false for positive number -// template typename enable::type signbit(T -// arg) -//{ return functions::signbit(arg); } -inline bool signbit(half arg) { return functions::signbit(arg); } -inline bool signbit(expr arg) { return functions::signbit(arg); } - -/// \} -/// \name Comparison -/// \{ - -/// Comparison for greater than. -/// \param x first operand -/// \param y second operand -/// \retval true if \a x greater than \a y -/// \retval false else -// template typename enable::type -// isgreater(T x, U y) { return -// functions::isgreater(x, y); } -inline bool isgreater(half x, half y) { return functions::isgreater(x, y); } -inline bool isgreater(half x, expr y) { return functions::isgreater(x, y); } -inline bool isgreater(expr x, half y) { return functions::isgreater(x, y); } -inline bool isgreater(expr x, expr y) { return functions::isgreater(x, y); } - -/// Comparison for greater equal. -/// \param x first operand -/// \param y second operand -/// \retval true if \a x greater equal \a y -/// \retval false else -// template typename enable::type -// isgreaterequal(T x, U y) { return -// functions::isgreaterequal(x, y); } -inline bool isgreaterequal(half x, half y) { - return functions::isgreaterequal(x, y); -} -inline bool isgreaterequal(half x, expr y) { - return functions::isgreaterequal(x, y); -} -inline bool isgreaterequal(expr x, half y) { - return functions::isgreaterequal(x, y); -} -inline bool isgreaterequal(expr x, expr y) { - return functions::isgreaterequal(x, y); -} - -/// Comparison for less than. -/// \param x first operand -/// \param y second operand -/// \retval true if \a x less than \a y -/// \retval false else -// template typename enable::type -// isless(T x, U y) { return functions::isless(x, -// y); -//} -inline bool isless(half x, half y) { return functions::isless(x, y); } -inline bool isless(half x, expr y) { return functions::isless(x, y); } -inline bool isless(expr x, half y) { return functions::isless(x, y); } -inline bool isless(expr x, expr y) { return functions::isless(x, y); } - -/// Comparison for less equal. -/// \param x first operand -/// \param y second operand -/// \retval true if \a x less equal \a y -/// \retval false else -// template typename enable::type -// islessequal(T x, U y) { return -// functions::islessequal(x, y); } -inline bool islessequal(half x, half y) { return functions::islessequal(x, y); } -inline bool islessequal(half x, expr y) { return functions::islessequal(x, y); } -inline bool islessequal(expr x, half y) { return functions::islessequal(x, y); } -inline bool islessequal(expr x, expr y) { return functions::islessequal(x, y); } - -/// Comarison for less or greater. -/// \param x first operand -/// \param y second operand -/// \retval true if either less or greater -/// \retval false else -// template typename enable::type -// islessgreater(T x, U y) { return -// functions::islessgreater(x, y); } -inline bool islessgreater(half x, half y) { - return functions::islessgreater(x, y); -} -inline bool islessgreater(half x, expr y) { - return functions::islessgreater(x, y); -} -inline bool islessgreater(expr x, half y) { - return functions::islessgreater(x, y); -} -inline bool islessgreater(expr x, expr y) { - return functions::islessgreater(x, y); -} - -/// Check if unordered. -/// \param x first operand -/// \param y second operand -/// \retval true if unordered (one or two NaN operands) -/// \retval false else -// template typename enable::type -// isunordered(T x, U y) { return -// functions::isunordered(x, y); } -inline bool isunordered(half x, half y) { return functions::isunordered(x, y); } -inline bool isunordered(half x, expr y) { return functions::isunordered(x, y); } -inline bool isunordered(expr x, half y) { return functions::isunordered(x, y); } -inline bool isunordered(expr x, expr y) { return functions::isunordered(x, y); } - -/// \name Casting -/// \{ - -/// Cast to or from half-precision floating point number. -/// This casts between [half](\ref half_float::half) and any built-in arithmetic -/// type. The values are converted -/// directly using the given rounding mode, without any roundtrip over `float` -/// that a `static_cast` would otherwise do. -/// It uses the default rounding mode. -/// -/// Using this cast with neither of the two types being a [half](\ref -/// half_float::half) or with any of the two types -/// not being a built-in arithmetic type (apart from [half](\ref -/// half_float::half), of course) results in a compiler -/// error and casting between [half](\ref half_float::half)s is just a no-op. -/// \tparam T destination type (half or built-in arithmetic type) -/// \tparam U source type (half or built-in arithmetic type) -/// \param arg value to cast -/// \return \a arg converted to destination type -template T half_cast(U arg) { - return half_caster::cast(arg); -} - -/// Cast to or from half-precision floating point number. -/// This casts between [half](\ref half_float::half) and any built-in arithmetic -/// type. The values are converted -/// directly using the given rounding mode, without any roundtrip over `float` -/// that a `static_cast` would otherwise do. -/// -/// Using this cast with neither of the two types being a [half](\ref -/// half_float::half) or with any of the two types -/// not being a built-in arithmetic type (apart from [half](\ref -/// half_float::half), of course) results in a compiler -/// error and casting between [half](\ref half_float::half)s is just a no-op. -/// \tparam T destination type (half or built-in arithmetic type) -/// \tparam R rounding mode to use. -/// \tparam U source type (half or built-in arithmetic type) -/// \param arg value to cast -/// \return \a arg converted to destination type -template T half_cast(U arg) { - return half_caster::cast(arg); -} -/// \} -} // namespace detail - -using detail::operator==; -using detail::operator!=; -using detail::operator<; -using detail::operator>; -using detail::operator<=; -using detail::operator>=; -using detail::operator+; -using detail::operator-; -using detail::operator*; -using detail::operator/; -using detail::operator<<; -using detail::operator>>; - -using detail::abs; -using detail::acos; -using detail::acosh; -using detail::asin; -using detail::asinh; -using detail::atan; -using detail::atan2; -using detail::atanh; -using detail::cbrt; -using detail::ceil; -using detail::cos; -using detail::cosh; -using detail::erf; -using detail::erfc; -using detail::exp; -using detail::exp2; -using detail::expm1; -using detail::fabs; -using detail::fdim; -using detail::floor; -using detail::fma; -using detail::fmax; -using detail::fmin; -using detail::fmod; -using detail::hypot; -using detail::lgamma; -using detail::log; -using detail::log10; -using detail::log1p; -using detail::log2; -using detail::lrint; -using detail::lround; -using detail::nanh; -using detail::nearbyint; -using detail::pow; -using detail::remainder; -using detail::remquo; -using detail::rint; -using detail::round; -using detail::sin; -using detail::sinh; -using detail::sqrt; -using detail::tan; -using detail::tanh; -using detail::tgamma; -using detail::trunc; -#if HALF_ENABLE_CPP11_LONG_LONG -using detail::llrint; -using detail::llround; -#endif -using detail::copysign; -using detail::fpclassify; -using detail::frexp; -using detail::ilogb; -using detail::isfinite; -using detail::isgreater; -using detail::isgreaterequal; -using detail::isinf; -using detail::isless; -using detail::islessequal; -using detail::islessgreater; -using detail::isnan; -using detail::isnormal; -using detail::isunordered; -using detail::ldexp; -using detail::logb; -using detail::modf; -using detail::nextafter; -using detail::nexttoward; -using detail::scalbln; -using detail::scalbn; -using detail::signbit; - -using detail::half_cast; -} // namespace half_float - -/// Extensions to the C++ standard library. -namespace std { -/// Numeric limits for half-precision floats. -/// Because of the underlying single-precision implementation of many -/// operations, it inherits some properties from -/// `std::numeric_limits`. -template <> -class numeric_limits : public numeric_limits { - public: - /// Supports signed values. - static HALF_CONSTEXPR_CONST bool is_signed = true; - - /// Is not exact. - static HALF_CONSTEXPR_CONST bool is_exact = false; - - /// Doesn't provide modulo arithmetic. - static HALF_CONSTEXPR_CONST bool is_modulo = false; - - /// IEEE conformant. - static HALF_CONSTEXPR_CONST bool is_iec559 = true; - - /// Supports infinity. - static HALF_CONSTEXPR_CONST bool has_infinity = true; - - /// Supports quiet NaNs. - static HALF_CONSTEXPR_CONST bool has_quiet_NaN = true; - - /// Supports subnormal values. - static HALF_CONSTEXPR_CONST float_denorm_style has_denorm = denorm_present; - - /// Rounding mode. - /// Due to the mix of internal single-precision computations (using the - /// rounding mode of the underlying - /// single-precision implementation) with the rounding mode of the - /// single-to-half conversions, the actual rounding - /// mode might be `std::round_indeterminate` if the default half-precision - /// rounding mode doesn't match the - /// single-precision rounding mode. - static HALF_CONSTEXPR_CONST float_round_style round_style = - (std::numeric_limits::round_style == half_float::half::round_style) - ? half_float::half::round_style - : round_indeterminate; - - /// Significant digits. - static HALF_CONSTEXPR_CONST int digits = 11; - - /// Significant decimal digits. - static HALF_CONSTEXPR_CONST int digits10 = 3; - - /// Required decimal digits to represent all possible values. - static HALF_CONSTEXPR_CONST int max_digits10 = 5; - - /// Number base. - static HALF_CONSTEXPR_CONST int radix = 2; - - /// One more than smallest exponent. - static HALF_CONSTEXPR_CONST int min_exponent = -13; - - /// Smallest normalized representable power of 10. - static HALF_CONSTEXPR_CONST int min_exponent10 = -4; - - /// One more than largest exponent - static HALF_CONSTEXPR_CONST int max_exponent = 16; - - /// Largest finitely representable power of 10. - static HALF_CONSTEXPR_CONST int max_exponent10 = 4; - - /// Smallest positive normal value. - static HALF_CONSTEXPR half_float::half min() HALF_NOTHROW { - return half_float::half(half_float::detail::binary, 0x0400); - } - - /// Smallest finite value. - static HALF_CONSTEXPR half_float::half lowest() HALF_NOTHROW { - return half_float::half(half_float::detail::binary, 0xFBFF); - } - - /// Largest finite value. - static HALF_CONSTEXPR half_float::half max() HALF_NOTHROW { - return half_float::half(half_float::detail::binary, 0x7BFF); - } - - /// Difference between one and next representable value. - static HALF_CONSTEXPR half_float::half epsilon() HALF_NOTHROW { - return half_float::half(half_float::detail::binary, 0x1400); - } - - /// Maximum rounding error. - static HALF_CONSTEXPR half_float::half round_error() HALF_NOTHROW { - return half_float::half(half_float::detail::binary, - (round_style == std::round_to_nearest) ? 0x3800 - : 0x3C00); - } - - /// Positive infinity. - static HALF_CONSTEXPR half_float::half infinity() HALF_NOTHROW { - return half_float::half(half_float::detail::binary, 0x7C00); - } - - /// Quiet NaN. - static HALF_CONSTEXPR half_float::half quiet_NaN() HALF_NOTHROW { - return half_float::half(half_float::detail::binary, 0x7FFF); - } - - /// Signalling NaN. - static HALF_CONSTEXPR half_float::half signaling_NaN() HALF_NOTHROW { - return half_float::half(half_float::detail::binary, 0x7DFF); - } - - /// Smallest positive subnormal value. - static HALF_CONSTEXPR half_float::half denorm_min() HALF_NOTHROW { - return half_float::half(half_float::detail::binary, 0x0001); - } -}; - -#if HALF_ENABLE_CPP11_HASH -/// Hash function for half-precision floats. -/// This is only defined if C++11 `std::hash` is supported and enabled. -template <> -struct hash //: unary_function -{ - /// Type of function argument. - typedef half_float::half argument_type; - - /// Function return type. - typedef size_t result_type; - - /// Compute hash function. - /// \param arg half to hash - /// \return hash value - result_type operator()(argument_type arg) const { - return hash()(static_cast(arg.data_) & - -(arg.data_ != 0x8000)); - } -}; -#endif -} // namespace std - -#undef HALF_CONSTEXPR -#undef HALF_CONSTEXPR_CONST -#undef HALF_NOEXCEPT -#undef HALF_NOTHROW -#ifdef HALF_POP_WARNINGS -#pragma warning(pop) -#undef HALF_POP_WARNINGS -#endif - -#endif diff --git a/csrcs/fastdeploy/backends/tensorrt/common/logger.cpp b/csrcs/fastdeploy/backends/tensorrt/common/logger.cpp deleted file mode 100644 index 1e1671558..000000000 --- a/csrcs/fastdeploy/backends/tensorrt/common/logger.cpp +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "logger.h" -#include "ErrorRecorder.h" -#include "logging.h" - -SampleErrorRecorder gRecorder; -namespace sample { -Logger gLogger{Logger::Severity::kINFO}; -LogStreamConsumer gLogVerbose{LOG_VERBOSE(gLogger)}; -LogStreamConsumer gLogInfo{LOG_INFO(gLogger)}; -LogStreamConsumer gLogWarning{LOG_WARN(gLogger)}; -LogStreamConsumer gLogError{LOG_ERROR(gLogger)}; -LogStreamConsumer gLogFatal{LOG_FATAL(gLogger)}; - -void setReportableSeverity(Logger::Severity severity) { - gLogger.setReportableSeverity(severity); - gLogVerbose.setReportableSeverity(severity); - gLogInfo.setReportableSeverity(severity); - gLogWarning.setReportableSeverity(severity); - gLogError.setReportableSeverity(severity); - gLogFatal.setReportableSeverity(severity); -} -} // namespace sample diff --git a/csrcs/fastdeploy/backends/tensorrt/common/logger.h b/csrcs/fastdeploy/backends/tensorrt/common/logger.h deleted file mode 100644 index ab642744e..000000000 --- a/csrcs/fastdeploy/backends/tensorrt/common/logger.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LOGGER_H -#define LOGGER_H - -#include "logging.h" - -class SampleErrorRecorder; -extern SampleErrorRecorder gRecorder; -namespace sample { -extern Logger gLogger; -extern LogStreamConsumer gLogVerbose; -extern LogStreamConsumer gLogInfo; -extern LogStreamConsumer gLogWarning; -extern LogStreamConsumer gLogError; -extern LogStreamConsumer gLogFatal; - -void setReportableSeverity(Logger::Severity severity); -} // namespace sample - -#endif // LOGGER_H diff --git a/csrcs/fastdeploy/backends/tensorrt/common/logging.h b/csrcs/fastdeploy/backends/tensorrt/common/logging.h deleted file mode 100644 index abcb6b406..000000000 --- a/csrcs/fastdeploy/backends/tensorrt/common/logging.h +++ /dev/null @@ -1,573 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef TENSORRT_LOGGING_H -#define TENSORRT_LOGGING_H - -#include "NvInferRuntimeCommon.h" -#include "sampleOptions.h" -#include -#include -#include -#include -#include -#include -#include -#include - -namespace sample { - -using Severity = nvinfer1::ILogger::Severity; - -class LogStreamConsumerBuffer : public std::stringbuf { - public: - LogStreamConsumerBuffer(std::ostream& stream, const std::string& prefix, - bool shouldLog) - : mOutput(stream), mPrefix(prefix), mShouldLog(shouldLog) {} - - LogStreamConsumerBuffer(LogStreamConsumerBuffer&& other) noexcept - : mOutput(other.mOutput), mPrefix(other.mPrefix), - mShouldLog(other.mShouldLog) {} - LogStreamConsumerBuffer(const LogStreamConsumerBuffer& other) = delete; - LogStreamConsumerBuffer() = delete; - LogStreamConsumerBuffer& operator=(const LogStreamConsumerBuffer&) = delete; - LogStreamConsumerBuffer& operator=(LogStreamConsumerBuffer&&) = delete; - - ~LogStreamConsumerBuffer() override { - // std::streambuf::pbase() gives a pointer to the beginning of the buffered - // part of the output sequence - // std::streambuf::pptr() gives a pointer to the current position of the - // output sequence - // if the pointer to the beginning is not equal to the pointer to the - // current position, - // call putOutput() to log the output to the stream - if (pbase() != pptr()) { - putOutput(); - } - } - - //! - //! synchronizes the stream buffer and returns 0 on success - //! synchronizing the stream buffer consists of inserting the buffer contents - //! into the stream, - //! resetting the buffer and flushing the stream - //! - int32_t sync() override { - putOutput(); - return 0; - } - - void putOutput() { - if (mShouldLog) { - // prepend timestamp - std::time_t timestamp = std::time(nullptr); - tm* tm_local = std::localtime(×tamp); - mOutput << "["; - mOutput << std::setw(2) << std::setfill('0') << 1 + tm_local->tm_mon - << "/"; - mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_mday << "/"; - mOutput << std::setw(4) << std::setfill('0') << 1900 + tm_local->tm_year - << "-"; - mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_hour << ":"; - mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_min << ":"; - mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_sec << "] "; - // std::stringbuf::str() gets the string contents of the buffer - // insert the buffer contents pre-appended by the appropriate prefix into - // the stream - mOutput << mPrefix << str(); - } - // set the buffer to empty - str(""); - // flush the stream - mOutput.flush(); - } - - void setShouldLog(bool shouldLog) { mShouldLog = shouldLog; } - - private: - std::ostream& mOutput; - std::string mPrefix; - bool mShouldLog{}; -}; // class LogStreamConsumerBuffer - -//! -//! \class LogStreamConsumerBase -//! \brief Convenience object used to initialize LogStreamConsumerBuffer before -//! std::ostream in LogStreamConsumer -//! -class LogStreamConsumerBase { - public: - LogStreamConsumerBase(std::ostream& stream, const std::string& prefix, - bool shouldLog) - : mBuffer(stream, prefix, shouldLog) {} - - protected: - std::mutex mLogMutex; - LogStreamConsumerBuffer mBuffer; -}; // class LogStreamConsumerBase - -//! -//! \class LogStreamConsumer -//! \brief Convenience object used to facilitate use of C++ stream syntax when -//! logging messages. -//! Order of base classes is LogStreamConsumerBase and then std::ostream. -//! This is because the LogStreamConsumerBase class is used to initialize the -//! LogStreamConsumerBuffer member field -//! in LogStreamConsumer and then the address of the buffer is passed to -//! std::ostream. -//! This is necessary to prevent the address of an uninitialized buffer from -//! being passed to std::ostream. -//! Please do not change the order of the parent classes. -//! -class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream { - public: - //! - //! \brief Creates a LogStreamConsumer which logs messages with level - //! severity. - //! Reportable severity determines if the messages are severe enough to be - //! logged. - //! - LogStreamConsumer(nvinfer1::ILogger::Severity reportableSeverity, - nvinfer1::ILogger::Severity severity) - : LogStreamConsumerBase(severityOstream(severity), - severityPrefix(severity), - severity <= reportableSeverity), - std::ostream(&mBuffer) // links the stream buffer with the stream - , - mShouldLog(severity <= reportableSeverity), mSeverity(severity) {} - - LogStreamConsumer(LogStreamConsumer&& other) noexcept - : LogStreamConsumerBase(severityOstream(other.mSeverity), - severityPrefix(other.mSeverity), - other.mShouldLog), - std::ostream(&mBuffer) // links the stream buffer with the stream - , - mShouldLog(other.mShouldLog), mSeverity(other.mSeverity) {} - LogStreamConsumer(const LogStreamConsumer& other) = delete; - LogStreamConsumer() = delete; - ~LogStreamConsumer() = default; - LogStreamConsumer& operator=(const LogStreamConsumer&) = delete; - LogStreamConsumer& operator=(LogStreamConsumer&&) = delete; - - void setReportableSeverity(Severity reportableSeverity) { - mShouldLog = mSeverity <= reportableSeverity; - mBuffer.setShouldLog(mShouldLog); - } - - std::mutex& getMutex() { return mLogMutex; } - - bool getShouldLog() const { return mShouldLog; } - - private: - static std::ostream& severityOstream(Severity severity) { - return severity >= Severity::kINFO ? std::cout : std::cerr; - } - - static std::string severityPrefix(Severity severity) { - switch (severity) { - case Severity::kINTERNAL_ERROR: - return "[F] "; - case Severity::kERROR: - return "[E] "; - case Severity::kWARNING: - return "[W] "; - case Severity::kINFO: - return "[I] "; - case Severity::kVERBOSE: - return "[V] "; - default: - assert(0); - return ""; - } - } - - bool mShouldLog; - Severity mSeverity; -}; // class LogStreamConsumer - -template -LogStreamConsumer& operator<<(LogStreamConsumer& logger, const T& obj) { - if (logger.getShouldLog()) { - std::lock_guard guard(logger.getMutex()); - auto& os = static_cast(logger); - os << obj; - } - return logger; -} - -//! -//! Special handling std::endl -//! -inline LogStreamConsumer& operator<<(LogStreamConsumer& logger, - std::ostream& (*f)(std::ostream&)) { - if (logger.getShouldLog()) { - std::lock_guard guard(logger.getMutex()); - auto& os = static_cast(logger); - os << f; - } - return logger; -} - -inline LogStreamConsumer& operator<<(LogStreamConsumer& logger, - const nvinfer1::Dims& dims) { - if (logger.getShouldLog()) { - std::lock_guard guard(logger.getMutex()); - auto& os = static_cast(logger); - for (int32_t i = 0; i < dims.nbDims; ++i) { - os << (i ? "x" : "") << dims.d[i]; - } - } - return logger; -} - -//! -//! \class Logger -//! -//! \brief Class which manages logging of TensorRT tools and samples -//! -//! \details This class provides a common interface for TensorRT tools and -//! samples to log information to the console, -//! and supports logging two types of messages: -//! -//! - Debugging messages with an associated severity (info, warning, error, or -//! internal error/fatal) -//! - Test pass/fail messages -//! -//! The advantage of having all samples use this class for logging as opposed to -//! emitting directly to stdout/stderr is -//! that the logic for controlling the verbosity and formatting of sample output -//! is centralized in one location. -//! -//! In the future, this class could be extended to support dumping test results -//! to a file in some standard format -//! (for example, JUnit XML), and providing additional metadata (e.g. timing the -//! duration of a test run). -//! -//! TODO: For backwards compatibility with existing samples, this class inherits -//! directly from the nvinfer1::ILogger -//! interface, which is problematic since there isn't a clean separation between -//! messages coming from the TensorRT -//! library and messages coming from the sample. -//! -//! In the future (once all samples are updated to use Logger::getTRTLogger() to -//! access the ILogger) we can refactor the -//! class to eliminate the inheritance and instead make the nvinfer1::ILogger -//! implementation a member of the Logger -//! object. -//! -class Logger : public nvinfer1::ILogger { - public: - explicit Logger(Severity severity = Severity::kWARNING) - : mReportableSeverity(severity) {} - - //! - //! \enum TestResult - //! \brief Represents the state of a given test - //! - enum class TestResult { - kRUNNING, //!< The test is running - kPASSED, //!< The test passed - kFAILED, //!< The test failed - kWAIVED //!< The test was waived - }; - - //! - //! \brief Forward-compatible method for retrieving the nvinfer::ILogger - //! associated with this Logger - //! \return The nvinfer1::ILogger associated with this Logger - //! - //! TODO Once all samples are updated to use this method to register the - //! logger with TensorRT, - //! we can eliminate the inheritance of Logger from ILogger - //! - nvinfer1::ILogger& getTRTLogger() noexcept { return *this; } - - //! - //! \brief Implementation of the nvinfer1::ILogger::log() virtual method - //! - //! Note samples should not be calling this function directly; it will - //! eventually go away once we eliminate the - //! inheritance from nvinfer1::ILogger - //! - void log(Severity severity, const char* msg) noexcept override { - LogStreamConsumer(mReportableSeverity, severity) - << "[TRT] " << std::string(msg) << std::endl; - } - - //! - //! \brief Method for controlling the verbosity of logging output - //! - //! \param severity The logger will only emit messages that have severity of - //! this level or higher. - //! - void setReportableSeverity(Severity severity) noexcept { - mReportableSeverity = severity; - } - - //! - //! \brief Opaque handle that holds logging information for a particular test - //! - //! This object is an opaque handle to information used by the Logger to print - //! test results. - //! The sample must call Logger::defineTest() in order to obtain a TestAtom - //! that can be used - //! with Logger::reportTest{Start,End}(). - //! - class TestAtom { - public: - TestAtom(TestAtom&&) = default; - - private: - friend class Logger; - - TestAtom(bool started, const std::string& name, const std::string& cmdline) - : mStarted(started), mName(name), mCmdline(cmdline) {} - - bool mStarted; - std::string mName; - std::string mCmdline; - }; - - //! - //! \brief Define a test for logging - //! - //! \param[in] name The name of the test. This should be a string starting - //! with - //! "TensorRT" and containing dot-separated strings - //! containing - //! the characters [A-Za-z0-9_]. - //! For example, "TensorRT.sample_googlenet" - //! \param[in] cmdline The command line used to reproduce the test - // - //! \return a TestAtom that can be used in Logger::reportTest{Start,End}(). - //! - static TestAtom defineTest(const std::string& name, - const std::string& cmdline) { - return TestAtom(false, name, cmdline); - } - - //! - //! \brief A convenience overloaded version of defineTest() that accepts an - //! array of command-line arguments - //! as input - //! - //! \param[in] name The name of the test - //! \param[in] argc The number of command-line arguments - //! \param[in] argv The array of command-line arguments (given as C strings) - //! - //! \return a TestAtom that can be used in Logger::reportTest{Start,End}(). - //! - static TestAtom defineTest(const std::string& name, int32_t argc, - char const* const* argv) { - // Append TensorRT version as info - const std::string vname = - name + " [TensorRT v" + std::to_string(NV_TENSORRT_VERSION) + "]"; - auto cmdline = genCmdlineString(argc, argv); - return defineTest(vname, cmdline); - } - - //! - //! \brief Report that a test has started. - //! - //! \pre reportTestStart() has not been called yet for the given testAtom - //! - //! \param[in] testAtom The handle to the test that has started - //! - static void reportTestStart(TestAtom& testAtom) { - reportTestResult(testAtom, TestResult::kRUNNING); - assert(!testAtom.mStarted); - testAtom.mStarted = true; - } - - //! - //! \brief Report that a test has ended. - //! - //! \pre reportTestStart() has been called for the given testAtom - //! - //! \param[in] testAtom The handle to the test that has ended - //! \param[in] result The result of the test. Should be one of - //! TestResult::kPASSED, - //! TestResult::kFAILED, TestResult::kWAIVED - //! - static void reportTestEnd(TestAtom const& testAtom, TestResult result) { - assert(result != TestResult::kRUNNING); - assert(testAtom.mStarted); - reportTestResult(testAtom, result); - } - - static int32_t reportPass(TestAtom const& testAtom) { - reportTestEnd(testAtom, TestResult::kPASSED); - return EXIT_SUCCESS; - } - - static int32_t reportFail(TestAtom const& testAtom) { - reportTestEnd(testAtom, TestResult::kFAILED); - return EXIT_FAILURE; - } - - static int32_t reportWaive(TestAtom const& testAtom) { - reportTestEnd(testAtom, TestResult::kWAIVED); - return EXIT_SUCCESS; - } - - static int32_t reportTest(TestAtom const& testAtom, bool pass) { - return pass ? reportPass(testAtom) : reportFail(testAtom); - } - - Severity getReportableSeverity() const { return mReportableSeverity; } - - private: - //! - //! \brief returns an appropriate string for prefixing a log message with the - //! given severity - //! - static const char* severityPrefix(Severity severity) { - switch (severity) { - case Severity::kINTERNAL_ERROR: - return "[F] "; - case Severity::kERROR: - return "[E] "; - case Severity::kWARNING: - return "[W] "; - case Severity::kINFO: - return "[I] "; - case Severity::kVERBOSE: - return "[V] "; - default: - assert(0); - return ""; - } - } - - //! - //! \brief returns an appropriate string for prefixing a test result message - //! with the given result - //! - static const char* testResultString(TestResult result) { - switch (result) { - case TestResult::kRUNNING: - return "RUNNING"; - case TestResult::kPASSED: - return "PASSED"; - case TestResult::kFAILED: - return "FAILED"; - case TestResult::kWAIVED: - return "WAIVED"; - default: - assert(0); - return ""; - } - } - - //! - //! \brief returns an appropriate output stream (cout or cerr) to use with the - //! given severity - //! - static std::ostream& severityOstream(Severity severity) { - return severity >= Severity::kINFO ? std::cout : std::cerr; - } - - //! - //! \brief method that implements logging test results - //! - static void reportTestResult(TestAtom const& testAtom, TestResult result) { - severityOstream(Severity::kINFO) - << "&&&& " << testResultString(result) << " " << testAtom.mName << " # " - << testAtom.mCmdline << std::endl; - } - - //! - //! \brief generate a command line string from the given (argc, argv) values - //! - static std::string genCmdlineString(int32_t argc, char const* const* argv) { - std::stringstream ss; - for (int32_t i = 0; i < argc; i++) { - if (i > 0) { - ss << " "; - } - ss << argv[i]; - } - return ss.str(); - } - - Severity mReportableSeverity; -}; // class Logger - -namespace { -//! -//! \brief produces a LogStreamConsumer object that can be used to log messages -//! of severity kVERBOSE -//! -//! Example usage: -//! -//! LOG_VERBOSE(logger) << "hello world" << std::endl; -//! -inline LogStreamConsumer LOG_VERBOSE(const Logger& logger) { - return LogStreamConsumer(logger.getReportableSeverity(), Severity::kVERBOSE); -} - -//! -//! \brief produces a LogStreamConsumer object that can be used to log messages -//! of severity kINFO -//! -//! Example usage: -//! -//! LOG_INFO(logger) << "hello world" << std::endl; -//! -inline LogStreamConsumer LOG_INFO(const Logger& logger) { - return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINFO); -} - -//! -//! \brief produces a LogStreamConsumer object that can be used to log messages -//! of severity kWARNING -//! -//! Example usage: -//! -//! LOG_WARN(logger) << "hello world" << std::endl; -//! -inline LogStreamConsumer LOG_WARN(const Logger& logger) { - return LogStreamConsumer(logger.getReportableSeverity(), Severity::kWARNING); -} - -//! -//! \brief produces a LogStreamConsumer object that can be used to log messages -//! of severity kERROR -//! -//! Example usage: -//! -//! LOG_ERROR(logger) << "hello world" << std::endl; -//! -inline LogStreamConsumer LOG_ERROR(const Logger& logger) { - return LogStreamConsumer(logger.getReportableSeverity(), Severity::kERROR); -} - -//! -//! \brief produces a LogStreamConsumer object that can be used to log messages -//! of severity kINTERNAL_ERROR -//! ("fatal" severity) -//! -//! Example usage: -//! -//! LOG_FATAL(logger) << "hello world" << std::endl; -//! -inline LogStreamConsumer LOG_FATAL(const Logger& logger) { - return LogStreamConsumer(logger.getReportableSeverity(), - Severity::kINTERNAL_ERROR); -} -} // anonymous namespace -} // namespace sample -#endif // TENSORRT_LOGGING_H diff --git a/csrcs/fastdeploy/backends/tensorrt/common/parserOnnxConfig.h b/csrcs/fastdeploy/backends/tensorrt/common/parserOnnxConfig.h deleted file mode 100644 index 8569ca01c..000000000 --- a/csrcs/fastdeploy/backends/tensorrt/common/parserOnnxConfig.h +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef PARSER_ONNX_CONFIG_H -#define PARSER_ONNX_CONFIG_H - -#include -#include -#include - -#include "NvInfer.h" -#include "NvOnnxConfig.h" -#include "NvOnnxParser.h" - -#define ONNX_DEBUG 1 - -/** - * \class ParserOnnxConfig - * \brief Configuration Manager Class Concrete Implementation - * - * \note: - * - */ - -using namespace std; - -class ParserOnnxConfig : public nvonnxparser::IOnnxConfig { - protected: - string mModelFilename{}; - string mTextFilename{}; - string mFullTextFilename{}; - nvinfer1::DataType mModelDtype; - nvonnxparser::IOnnxConfig::Verbosity mVerbosity; - bool mPrintLayercInfo; - - public: - ParserOnnxConfig() - : mModelDtype(nvinfer1::DataType::kFLOAT), - mVerbosity(static_cast(nvinfer1::ILogger::Severity::kWARNING)), - mPrintLayercInfo(false) { -#ifdef ONNX_DEBUG - if (isDebug()) { - std::cout << " ParserOnnxConfig::ctor(): " << this << "\t" << std::endl; - } -#endif - } - - protected: - ~ParserOnnxConfig() { -#ifdef ONNX_DEBUG - if (isDebug()) { - std::cout << "ParserOnnxConfig::dtor(): " << this << std::endl; - } -#endif - } - - public: - virtual void setModelDtype(const nvinfer1::DataType modelDtype) noexcept { - mModelDtype = modelDtype; - } - - virtual nvinfer1::DataType getModelDtype() const noexcept { - return mModelDtype; - } - - virtual const char* getModelFileName() const noexcept { - return mModelFilename.c_str(); - } - virtual void setModelFileName(const char* onnxFilename) noexcept { - mModelFilename = string(onnxFilename); - } - virtual nvonnxparser::IOnnxConfig::Verbosity - getVerbosityLevel() const noexcept { - return mVerbosity; - } - virtual void addVerbosity() noexcept { ++mVerbosity; } - virtual void reduceVerbosity() noexcept { --mVerbosity; } - virtual void - setVerbosityLevel(nvonnxparser::IOnnxConfig::Verbosity verbosity) noexcept { - mVerbosity = verbosity; - } - - virtual const char* getTextFileName() const noexcept { - return mTextFilename.c_str(); - } - virtual void setTextFileName(const char* textFilename) noexcept { - mTextFilename = string(textFilename); - } - virtual const char* getFullTextFileName() const noexcept { - return mFullTextFilename.c_str(); - } - virtual void setFullTextFileName(const char* fullTextFilename) noexcept { - mFullTextFilename = string(fullTextFilename); - } - virtual bool getPrintLayerInfo() const noexcept { return mPrintLayercInfo; } - virtual void setPrintLayerInfo(bool src) noexcept { - mPrintLayercInfo = src; - } //!< get the boolean variable corresponding to the Layer Info, see - //! getPrintLayerInfo() - - virtual bool isDebug() const noexcept { -#if ONNX_DEBUG - return (std::getenv("ONNX_DEBUG") ? true : false); -#else - return false; -#endif - } - - virtual void destroy() noexcept { delete this; } - -}; // class ParserOnnxConfig - -#endif diff --git a/csrcs/fastdeploy/backends/tensorrt/common/safeCommon.h b/csrcs/fastdeploy/backends/tensorrt/common/safeCommon.h deleted file mode 100644 index 1aa92ad22..000000000 --- a/csrcs/fastdeploy/backends/tensorrt/common/safeCommon.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef TENSORRT_SAFE_COMMON_H -#define TENSORRT_SAFE_COMMON_H - -#include "NvInferRuntimeCommon.h" -#include -#include -#include -#include -#include - -#define CHECK(status) \ - do { \ - auto ret = (status); \ - if (ret != 0) { \ - std::cerr << "Cuda failure: " << ret << std::endl; \ - abort(); \ - } \ - } while (0) - -namespace samplesCommon { -template inline std::shared_ptr infer_object(T* obj) { - if (!obj) { - throw std::runtime_error("Failed to create object"); - } - return std::shared_ptr(obj); -} - -inline uint32_t elementSize(nvinfer1::DataType t) { - switch (t) { - case nvinfer1::DataType::kINT32: - case nvinfer1::DataType::kFLOAT: - return 4; - case nvinfer1::DataType::kHALF: - return 2; - case nvinfer1::DataType::kINT8: - return 1; - case nvinfer1::DataType::kBOOL: - return 1; - } - return 0; -} - -template inline A divUp(A x, B n) { - return (x + n - 1) / n; -} - -} // namespace samplesCommon - -#endif // TENSORRT_SAFE_COMMON_H diff --git a/csrcs/fastdeploy/backends/tensorrt/common/sampleConfig.h b/csrcs/fastdeploy/backends/tensorrt/common/sampleConfig.h deleted file mode 100644 index a097f4dbe..000000000 --- a/csrcs/fastdeploy/backends/tensorrt/common/sampleConfig.h +++ /dev/null @@ -1,251 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef SampleConfig_H -#define SampleConfig_H - -#include -#include -#include - -#include "NvInfer.h" -#include "NvOnnxConfig.h" -class SampleConfig : public nvonnxparser::IOnnxConfig { - public: - enum class InputDataFormat : int { kASCII = 0, kPPM = 1 }; - - private: - std::string mModelFilename; - std::string mEngineFilename; - std::string mTextFilename; - std::string mFullTextFilename; - std::string mImageFilename; - std::string mReferenceFilename; - std::string mOutputFilename; - std::string mCalibrationFilename; - std::string mTimingCacheFilename; - int64_t mLabel{-1}; - int64_t mMaxBatchSize{32}; - int64_t mCalibBatchSize{0}; - int64_t mMaxNCalibBatch{0}; - int64_t mFirstCalibBatch{0}; - int64_t mUseDLACore{-1}; - nvinfer1::DataType mModelDtype{nvinfer1::DataType::kFLOAT}; - bool mTF32{true}; - Verbosity mVerbosity{static_cast(nvinfer1::ILogger::Severity::kWARNING)}; - bool mPrintLayercInfo{false}; - bool mDebugBuilder{false}; - InputDataFormat mInputDataFormat{InputDataFormat::kASCII}; - uint64_t mTopK{0}; - float mFailurePercentage{-1.0f}; - float mTolerance{0.0f}; - float mAbsTolerance{1e-5f}; - - public: - SampleConfig() { -#ifdef ONNX_DEBUG - if (isDebug()) { - std::cout << " SampleConfig::ctor(): " << this << "\t" << std::endl; - } -#endif - } - - protected: - ~SampleConfig() { -#ifdef ONNX_DEBUG - if (isDebug()) { - std::cout << "SampleConfig::dtor(): " << this << std::endl; - } -#endif - } - - public: - void setModelDtype(const nvinfer1::DataType mdt) noexcept { - mModelDtype = mdt; - } - - nvinfer1::DataType getModelDtype() const noexcept { return mModelDtype; } - - bool getTF32() const noexcept { return mTF32; } - - void setTF32(bool enabled) noexcept { mTF32 = enabled; } - - const char* getModelFileName() const noexcept { - return mModelFilename.c_str(); - } - - void setModelFileName(const char* onnxFilename) noexcept { - mModelFilename = std::string(onnxFilename); - } - Verbosity getVerbosityLevel() const noexcept { return mVerbosity; } - void addVerbosity() noexcept { ++mVerbosity; } - void reduceVerbosity() noexcept { --mVerbosity; } - virtual void setVerbosityLevel(Verbosity v) noexcept { mVerbosity = v; } - const char* getEngineFileName() const noexcept { - return mEngineFilename.c_str(); - } - void setEngineFileName(const char* engineFilename) noexcept { - mEngineFilename = std::string(engineFilename); - } - const char* getTextFileName() const noexcept { return mTextFilename.c_str(); } - void setTextFileName(const char* textFilename) noexcept { - mTextFilename = std::string(textFilename); - } - const char* getFullTextFileName() const noexcept { - return mFullTextFilename.c_str(); - } - void setFullTextFileName(const char* fullTextFilename) noexcept { - mFullTextFilename = std::string(fullTextFilename); - } - void setLabel(int64_t label) noexcept { mLabel = label; } //!< set the Label - - int64_t getLabel() const noexcept { return mLabel; } //!< get the Label - - bool getPrintLayerInfo() const noexcept { return mPrintLayercInfo; } - - void setPrintLayerInfo(bool b) noexcept { - mPrintLayercInfo = b; - } //!< get the boolean variable corresponding to the Layer Info, see - //! getPrintLayerInfo() - - void setMaxBatchSize(int64_t maxBatchSize) noexcept { - mMaxBatchSize = maxBatchSize; - } //!< set the Max Batch Size - int64_t getMaxBatchSize() const noexcept { - return mMaxBatchSize; - } //!< get the Max Batch Size - - void setCalibBatchSize(int64_t CalibBatchSize) noexcept { - mCalibBatchSize = CalibBatchSize; - } //!< set the calibration batch size - int64_t getCalibBatchSize() const noexcept { - return mCalibBatchSize; - } //!< get calibration batch size - - void setMaxNCalibBatch(int64_t MaxNCalibBatch) noexcept { - mMaxNCalibBatch = MaxNCalibBatch; - } //!< set Max Number of Calibration Batches - int64_t getMaxNCalibBatch() const noexcept { - return mMaxNCalibBatch; - } //!< get the Max Number of Calibration Batches - - void setFirstCalibBatch(int64_t FirstCalibBatch) noexcept { - mFirstCalibBatch = FirstCalibBatch; - } //!< set the first calibration batch - int64_t getFirstCalibBatch() const noexcept { - return mFirstCalibBatch; - } //!< get the first calibration batch - - void setUseDLACore(int64_t UseDLACore) noexcept { - mUseDLACore = UseDLACore; - } //!< set the DLA core to use - int64_t getUseDLACore() const noexcept { - return mUseDLACore; - } //!< get the DLA core to use - - void setDebugBuilder() noexcept { - mDebugBuilder = true; - } //!< enable the Debug info, while building the engine. - bool getDebugBuilder() const noexcept { - return mDebugBuilder; - } //!< get the boolean variable, corresponding to the debug builder - - const char* - getImageFileName() const noexcept //!< set Image file name (PPM or ASCII) - { - return mImageFilename.c_str(); - } - void setImageFileName( - const char* imageFilename) noexcept //!< get the Image file name - { - mImageFilename = std::string(imageFilename); - } - const char* getReferenceFileName() const noexcept { - return mReferenceFilename.c_str(); - } - void setReferenceFileName( - const char* referenceFilename) noexcept //!< set reference file name - { - mReferenceFilename = std::string(referenceFilename); - } - - void setInputDataFormat(InputDataFormat idt) noexcept { - mInputDataFormat = idt; - } //!< specifies expected data format of the image file (PPM or ASCII) - InputDataFormat getInputDataFormat() const noexcept { - return mInputDataFormat; - } //!< returns the expected data format of the image file. - - const char* getOutputFileName() - const noexcept //!< specifies the file to save the results - { - return mOutputFilename.c_str(); - } - void setOutputFileName( - const char* outputFilename) noexcept //!< get the output file name - { - mOutputFilename = std::string(outputFilename); - } - - const char* getCalibrationFileName() const noexcept { - return mCalibrationFilename.c_str(); - } //!< specifies the file containing the list of image files for int8 - //! calibration - void setCalibrationFileName( - const char* calibrationFilename) noexcept //!< get the int 8 calibration - //! list file name - { - mCalibrationFilename = std::string(calibrationFilename); - } - - uint64_t getTopK() const noexcept { return mTopK; } - void setTopK(uint64_t topK) noexcept { - mTopK = topK; - } //!< If this options is specified, return the K top probabilities. - - float getFailurePercentage() const noexcept { return mFailurePercentage; } - - void setFailurePercentage(float f) noexcept { mFailurePercentage = f; } - - float getAbsoluteTolerance() const noexcept { return mAbsTolerance; } - - void setAbsoluteTolerance(float a) noexcept { mAbsTolerance = a; } - - float getTolerance() const noexcept { return mTolerance; } - - void setTolerance(float t) noexcept { mTolerance = t; } - - const char* getTimingCacheFilename() const noexcept { - return mTimingCacheFilename.c_str(); - } - - void setTimingCacheFileName(const char* timingCacheFilename) noexcept { - mTimingCacheFilename = std::string(timingCacheFilename); - } - - bool isDebug() const noexcept { -#if ONNX_DEBUG - return (std::getenv("ONNX_DEBUG") ? true : false); -#else - return false; -#endif - } - - void destroy() noexcept { delete this; } - -}; // class SampleConfig - -#endif diff --git a/csrcs/fastdeploy/backends/tensorrt/common/sampleDevice.h b/csrcs/fastdeploy/backends/tensorrt/common/sampleDevice.h deleted file mode 100644 index cdbb08019..000000000 --- a/csrcs/fastdeploy/backends/tensorrt/common/sampleDevice.h +++ /dev/null @@ -1,397 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef TRT_SAMPLE_DEVICE_H -#define TRT_SAMPLE_DEVICE_H - -#include -#include -#include -#include -#include - -namespace sample { - -inline void cudaCheck(cudaError_t ret, std::ostream& err = std::cerr) { - if (ret != cudaSuccess) { - err << "Cuda failure: " << cudaGetErrorString(ret) << std::endl; - abort(); - } -} - -class TrtCudaEvent; - -namespace { - -void cudaSleep(void* sleep) { - std::this_thread::sleep_for( - std::chrono::duration(*static_cast(sleep))); -} - -} // namespace - -//! -//! \class TrtCudaStream -//! \brief Managed CUDA stream -//! -class TrtCudaStream { - public: - TrtCudaStream() { cudaCheck(cudaStreamCreate(&mStream)); } - - TrtCudaStream(const TrtCudaStream&) = delete; - - TrtCudaStream& operator=(const TrtCudaStream&) = delete; - - TrtCudaStream(TrtCudaStream&&) = delete; - - TrtCudaStream& operator=(TrtCudaStream&&) = delete; - - ~TrtCudaStream() { cudaCheck(cudaStreamDestroy(mStream)); } - - cudaStream_t get() const { return mStream; } - - void synchronize() { cudaCheck(cudaStreamSynchronize(mStream)); } - - void wait(TrtCudaEvent& event); - - void sleep(float* ms) { - cudaCheck(cudaLaunchHostFunc(mStream, cudaSleep, ms)); - } - - private: - cudaStream_t mStream{}; -}; - -//! -//! \class TrtCudaEvent -//! \brief Managed CUDA event -//! -class TrtCudaEvent { - public: - explicit TrtCudaEvent(bool blocking = true) { - const uint32_t flags = blocking ? cudaEventBlockingSync : cudaEventDefault; - cudaCheck(cudaEventCreateWithFlags(&mEvent, flags)); - } - - TrtCudaEvent(const TrtCudaEvent&) = delete; - - TrtCudaEvent& operator=(const TrtCudaEvent&) = delete; - - TrtCudaEvent(TrtCudaEvent&&) = delete; - - TrtCudaEvent& operator=(TrtCudaEvent&&) = delete; - - ~TrtCudaEvent() { cudaCheck(cudaEventDestroy(mEvent)); } - - cudaEvent_t get() const { return mEvent; } - - void record(const TrtCudaStream& stream) { - cudaCheck(cudaEventRecord(mEvent, stream.get())); - } - - void synchronize() { cudaCheck(cudaEventSynchronize(mEvent)); } - - // Returns time elapsed time in milliseconds - float operator-(const TrtCudaEvent& e) const { - float time{0}; - cudaCheck(cudaEventElapsedTime(&time, e.get(), get())); - return time; - } - - private: - cudaEvent_t mEvent{}; -}; - -inline void TrtCudaStream::wait(TrtCudaEvent& event) { - cudaCheck(cudaStreamWaitEvent(mStream, event.get(), 0)); -} - -//! -//! \class TrtCudaGraph -//! \brief Managed CUDA graph -//! -class TrtCudaGraph { - public: - explicit TrtCudaGraph() = default; - - TrtCudaGraph(const TrtCudaGraph&) = delete; - - TrtCudaGraph& operator=(const TrtCudaGraph&) = delete; - - TrtCudaGraph(TrtCudaGraph&&) = delete; - - TrtCudaGraph& operator=(TrtCudaGraph&&) = delete; - - ~TrtCudaGraph() { - if (mGraphExec) { - cudaGraphExecDestroy(mGraphExec); - } - } - - void beginCapture(TrtCudaStream& stream) { - cudaCheck( - cudaStreamBeginCapture(stream.get(), cudaStreamCaptureModeThreadLocal)); - } - - bool launch(TrtCudaStream& stream) { - return cudaGraphLaunch(mGraphExec, stream.get()) == cudaSuccess; - } - - void endCapture(TrtCudaStream& stream) { - cudaCheck(cudaStreamEndCapture(stream.get(), &mGraph)); - cudaCheck(cudaGraphInstantiate(&mGraphExec, mGraph, nullptr, nullptr, 0)); - cudaCheck(cudaGraphDestroy(mGraph)); - } - - void endCaptureOnError(TrtCudaStream& stream) { - // There are two possibilities why stream capture would fail: - // (1) stream is in cudaErrorStreamCaptureInvalidated state. - // (2) TRT reports a failure. - // In case (1), the returning mGraph should be nullptr. - // In case (2), the returning mGraph is not nullptr, but it should not be - // used. - const auto ret = cudaStreamEndCapture(stream.get(), &mGraph); - if (ret == cudaErrorStreamCaptureInvalidated) { - assert(mGraph == nullptr); - } else { - assert(ret == cudaSuccess); - assert(mGraph != nullptr); - cudaCheck(cudaGraphDestroy(mGraph)); - mGraph = nullptr; - } - // Clean up any CUDA error. - cudaGetLastError(); - sample::gLogWarning << "The CUDA graph capture on the stream has failed." - << std::endl; - } - - private: - cudaGraph_t mGraph{}; - cudaGraphExec_t mGraphExec{}; -}; - -//! -//! \class TrtCudaBuffer -//! \brief Managed buffer for host and device -//! -template class TrtCudaBuffer { - public: - TrtCudaBuffer() = default; - - TrtCudaBuffer(const TrtCudaBuffer&) = delete; - - TrtCudaBuffer& operator=(const TrtCudaBuffer&) = delete; - - TrtCudaBuffer(TrtCudaBuffer&& rhs) { - reset(rhs.mPtr); - rhs.mPtr = nullptr; - } - - TrtCudaBuffer& operator=(TrtCudaBuffer&& rhs) { - if (this != &rhs) { - reset(rhs.mPtr); - rhs.mPtr = nullptr; - } - return *this; - } - - ~TrtCudaBuffer() { reset(); } - - TrtCudaBuffer(size_t size) { A()(&mPtr, size); } - - void allocate(size_t size) { - reset(); - A()(&mPtr, size); - } - - void reset(void* ptr = nullptr) { - if (mPtr) { - D()(mPtr); - } - mPtr = ptr; - } - - void* get() const { return mPtr; } - - private: - void* mPtr{nullptr}; -}; - -struct DeviceAllocator { - void operator()(void** ptr, size_t size) { cudaCheck(cudaMalloc(ptr, size)); } -}; - -struct DeviceDeallocator { - void operator()(void* ptr) { cudaCheck(cudaFree(ptr)); } -}; - -struct ManagedAllocator { - void operator()(void** ptr, size_t size) { - cudaCheck(cudaMallocManaged(ptr, size)); - } -}; - -struct HostAllocator { - void operator()(void** ptr, size_t size) { - cudaCheck(cudaMallocHost(ptr, size)); - } -}; - -struct HostDeallocator { - void operator()(void* ptr) { cudaCheck(cudaFreeHost(ptr)); } -}; - -using TrtDeviceBuffer = TrtCudaBuffer; -using TrtManagedBuffer = TrtCudaBuffer; - -using TrtHostBuffer = TrtCudaBuffer; - -//! -//! \class MirroredBuffer -//! \brief Coupled host and device buffers -//! -class IMirroredBuffer { - public: - //! - //! Allocate memory for the mirrored buffer give the size - //! of the allocation. - //! - virtual void allocate(size_t size) = 0; - - //! - //! Get the pointer to the device side buffer. - //! - //! \return pointer to device memory or nullptr if uninitialized. - //! - virtual void* getDeviceBuffer() const = 0; - - //! - //! Get the pointer to the host side buffer. - //! - //! \return pointer to host memory or nullptr if uninitialized. - //! - virtual void* getHostBuffer() const = 0; - - //! - //! Copy the memory from host to device. - //! - virtual void hostToDevice(TrtCudaStream& stream) = 0; - - //! - //! Copy the memory from device to host. - //! - virtual void deviceToHost(TrtCudaStream& stream) = 0; - - //! - //! Interface to get the size of the memory - //! - //! \return the size of memory allocated. - //! - virtual size_t getSize() const = 0; - - //! - //! Virtual destructor declaraion - //! - virtual ~IMirroredBuffer() = default; - -}; // class IMirroredBuffer - -//! -//! Class to have a seperate memory buffer for discrete device and host -//! allocations. -//! -class DiscreteMirroredBuffer : public IMirroredBuffer { - public: - void allocate(size_t size) { - mSize = size; - mHostBuffer.allocate(size); - mDeviceBuffer.allocate(size); - } - - void* getDeviceBuffer() const { return mDeviceBuffer.get(); } - - void* getHostBuffer() const { return mHostBuffer.get(); } - - void hostToDevice(TrtCudaStream& stream) { - cudaCheck(cudaMemcpyAsync(mDeviceBuffer.get(), mHostBuffer.get(), mSize, - cudaMemcpyHostToDevice, stream.get())); - } - - void deviceToHost(TrtCudaStream& stream) { - cudaCheck(cudaMemcpyAsync(mHostBuffer.get(), mDeviceBuffer.get(), mSize, - cudaMemcpyDeviceToHost, stream.get())); - } - - size_t getSize() const { return mSize; } - - private: - size_t mSize{0}; - TrtHostBuffer mHostBuffer; - TrtDeviceBuffer mDeviceBuffer; -}; // class DiscreteMirroredBuffer - -//! -//! Class to have a unified memory buffer for embedded devices. -//! -class UnifiedMirroredBuffer : public IMirroredBuffer { - public: - void allocate(size_t size) { - mSize = size; - mBuffer.allocate(size); - } - - void* getDeviceBuffer() const { return mBuffer.get(); } - - void* getHostBuffer() const { return mBuffer.get(); } - - void hostToDevice(TrtCudaStream& stream) { - // Does nothing since we are using unified memory. - } - - void deviceToHost(TrtCudaStream& stream) { - // Does nothing since we are using unified memory. - } - - size_t getSize() const { return mSize; } - - private: - size_t mSize{0}; - TrtManagedBuffer mBuffer; -}; // class UnifiedMirroredBuffer - -inline void setCudaDevice(int device, std::ostream& os) { - cudaCheck(cudaSetDevice(device)); - - cudaDeviceProp properties; - cudaCheck(cudaGetDeviceProperties(&properties, device)); - - // clang-format off - os << "=== Device Information ===" << std::endl; - os << "Selected Device: " << properties.name << std::endl; - os << "Compute Capability: " << properties.major << "." << properties.minor << std::endl; - os << "SMs: " << properties.multiProcessorCount << std::endl; - os << "Compute Clock Rate: " << properties.clockRate / 1000000.0F << " GHz" << std::endl; - os << "Device Global Memory: " << (properties.totalGlobalMem >> 20) << " MiB" << std::endl; - os << "Shared Memory per SM: " << (properties.sharedMemPerMultiprocessor >> 10) << " KiB" << std::endl; - os << "Memory Bus Width: " << properties.memoryBusWidth << " bits" - << " (ECC " << (properties.ECCEnabled != 0 ? "enabled" : "disabled") << ")" << std::endl; - os << "Memory Clock Rate: " << properties.memoryClockRate / 1000000.0F << " GHz" << std::endl; - // clang-format on -} - -} // namespace sample - -#endif // TRT_SAMPLE_DEVICE_H diff --git a/csrcs/fastdeploy/backends/tensorrt/common/sampleEngines.cpp b/csrcs/fastdeploy/backends/tensorrt/common/sampleEngines.cpp deleted file mode 100644 index 6c1ab35b1..000000000 --- a/csrcs/fastdeploy/backends/tensorrt/common/sampleEngines.cpp +++ /dev/null @@ -1,1710 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -//#include "NvCaffeParser.h" -#include "NvInfer.h" -#include "NvOnnxParser.h" - -#include "ErrorRecorder.h" -#include "common.h" -#include "half.h" -#include "logger.h" -#include "sampleEngines.h" -#include "sampleOptions.h" -#include "sampleUtils.h" - -#if !defined(_WIN32) -#include -#endif - -using namespace nvinfer1; - -namespace sample { - -namespace { - -//struct CaffeBufferShutter { -// ~CaffeBufferShutter() { nvcaffeparser1::shutdownProtobufLibrary(); } -//}; - -std::map -readScalesFromCalibrationCache(const std::string& calibrationFile) { - std::map tensorScales; - std::ifstream cache{calibrationFile}; - if (!cache.is_open()) { - sample::gLogError << "[TRT] Can not open provided calibration cache file" - << std::endl; - return tensorScales; - } - std::string line; - while (std::getline(cache, line)) { - auto colonPos = line.find_last_of(':'); - if (colonPos != std::string::npos) { - // Scales should be stored in calibration cache as 32-bit floating numbers - // encoded as 32-bit integers - int32_t scalesAsInt = - std::stoi(line.substr(colonPos + 2, 8), nullptr, 16); - const auto tensorName = line.substr(0, colonPos); - tensorScales[tensorName] = *reinterpret_cast(&scalesAsInt); - } - } - cache.close(); - return tensorScales; -} -} // namespace - -void setTensorScalesFromCalibration(nvinfer1::INetworkDefinition& network, - const std::vector& inputFormats, - const std::vector& outputFormats, - const std::string& calibrationFile) { - const auto tensorScales = readScalesFromCalibrationCache(calibrationFile); - const bool broadcastInputFormats = - broadcastIOFormats(inputFormats, network.getNbInputs()); - for (int32_t i = 0, n = network.getNbInputs(); i < n; ++i) { - int32_t formatIdx = broadcastInputFormats ? 0 : i; - if (!inputFormats.empty() && - inputFormats[formatIdx].first == DataType::kINT8) { - auto* input = network.getInput(i); - const auto calibScale = tensorScales.at(input->getName()); - input->setDynamicRange(-127 * calibScale, 127 * calibScale); - } - } - const bool broadcastOutputFormats = - broadcastIOFormats(outputFormats, network.getNbInputs()); - for (int32_t i = 0, n = network.getNbOutputs(); i < n; ++i) { - int32_t formatIdx = broadcastOutputFormats ? 0 : i; - if (!outputFormats.empty() && - outputFormats[formatIdx].first == DataType::kINT8) { - auto* output = network.getOutput(i); - const auto calibScale = tensorScales.at(output->getName()); - output->setDynamicRange(-127 * calibScale, 127 * calibScale); - } - } -} - -#define SMP_RETVAL_IF_FALSE(condition, msg, retval, err) \ - { \ - if ((condition) == false) { \ - (err) << (msg) << std::endl; \ - return retval; \ - } \ - } - -Parser modelToNetwork(const ModelOptions& model, - nvinfer1::INetworkDefinition& network, - std::ostream& err) { - sample::gLogInfo << "Start parsing network model" << std::endl; - Parser parser; - const std::string& modelName = model.baseModel.model; - switch (model.baseModel.format) { -/* - case ModelFormat::kCAFFE: { - using namespace nvcaffeparser1; - parser.caffeParser.reset(createCaffeParser()); - CaffeBufferShutter bufferShutter; - const auto* const blobNameToTensor = parser.caffeParser->parse( - model.prototxt.c_str(), modelName.empty() ? nullptr : modelName.c_str(), - network, DataType::kFLOAT); - if (!blobNameToTensor) { - err << "Failed to parse caffe model or prototxt, tensors blob not found" - << std::endl; - parser.caffeParser.reset(); - break; - } - - for (const auto& s : model.outputs) { - if (blobNameToTensor->find(s.c_str()) == nullptr) { - err << "Could not find output blob " << s << std::endl; - parser.caffeParser.reset(); - break; - } - network.markOutput(*blobNameToTensor->find(s.c_str())); - } - break; - } -*/ - case ModelFormat::kONNX: { - using namespace nvonnxparser; - parser.onnxParser.reset( - createParser(network, sample::gLogger.getTRTLogger())); - if (!parser.onnxParser->parseFromFile( - model.baseModel.model.c_str(), - static_cast(sample::gLogger.getReportableSeverity()))) { - err << "Failed to parse onnx file" << std::endl; - parser.onnxParser.reset(); - } - break; - } - case ModelFormat::kANY: - break; - } - - sample::gLogInfo << "Finish parsing network model" << std::endl; - return parser; -} - -namespace { - -class RndInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator2 { - public: - RndInt8Calibrator(int batches, std::vector& elemCount, - const std::string& cacheFile, - const nvinfer1::INetworkDefinition& network, - std::ostream& err); - - ~RndInt8Calibrator() { - for (auto& elem : mInputDeviceBuffers) { - cudaCheck(cudaFree(elem.second), mErr); - } - } - - bool getBatch(void* bindings[], const char* names[], - int nbBindings) noexcept override; - - int getBatchSize() const noexcept override { return 1; } - - const void* readCalibrationCache(size_t& length) noexcept override; - - virtual void writeCalibrationCache(const void*, size_t) noexcept override {} - - private: - int mBatches{}; - int mCurrentBatch{}; - std::string mCacheFile; - std::map mInputDeviceBuffers; - std::vector mCalibrationCache; - std::ostream& mErr; -}; - -RndInt8Calibrator::RndInt8Calibrator(int batches, - std::vector& elemCount, - const std::string& cacheFile, - const INetworkDefinition& network, - std::ostream& err) - : mBatches(batches), mCurrentBatch(0), mCacheFile(cacheFile), mErr(err) { - std::ifstream tryCache(cacheFile, std::ios::binary); - if (tryCache.good()) { - return; - } - - std::default_random_engine generator; - std::uniform_real_distribution distribution(-1.0F, 1.0F); - auto gen = [&generator, &distribution]() { return distribution(generator); }; - - for (int i = 0; i < network.getNbInputs(); i++) { - auto* input = network.getInput(i); - std::vector rnd_data(elemCount[i]); - std::generate_n(rnd_data.begin(), elemCount[i], gen); - - void* data; - cudaCheck(cudaMalloc(&data, elemCount[i] * sizeof(float)), mErr); - cudaCheck(cudaMemcpy(data, rnd_data.data(), elemCount[i] * sizeof(float), - cudaMemcpyHostToDevice), - mErr); - - mInputDeviceBuffers.insert(std::make_pair(input->getName(), data)); - } -} - -bool RndInt8Calibrator::getBatch(void* bindings[], const char* names[], - int nbBindings) noexcept { - if (mCurrentBatch >= mBatches) { - return false; - } - - for (int i = 0; i < nbBindings; ++i) { - bindings[i] = mInputDeviceBuffers[names[i]]; - } - - ++mCurrentBatch; - - return true; -} - -const void* RndInt8Calibrator::readCalibrationCache(size_t& length) noexcept { - mCalibrationCache.clear(); - std::ifstream input(mCacheFile, std::ios::binary); - input >> std::noskipws; - if (input.good()) { - std::copy(std::istream_iterator(input), std::istream_iterator(), - std::back_inserter(mCalibrationCache)); - } - - length = mCalibrationCache.size(); - return !mCalibrationCache.empty() ? mCalibrationCache.data() : nullptr; -} - -bool setTensorDynamicRange(const INetworkDefinition& network, - float inRange = 2.0F, float outRange = 4.0F) { - // Ensure that all layer inputs have a dynamic range. - for (int l = 0; l < network.getNbLayers(); l++) { - auto* layer = network.getLayer(l); - for (int i = 0; i < layer->getNbInputs(); i++) { - ITensor* input{layer->getInput(i)}; - // Optional inputs are nullptr here and are from RNN layers. - if (input && !input->dynamicRangeIsSet()) { - // Concat should propagate dynamic range from outputs to inputs to avoid - // Re-quantization during the concatenation - auto dynRange = (layer->getType() == LayerType::kCONCATENATION) - ? outRange - : inRange; - if (!input->setDynamicRange(-dynRange, dynRange)) { - return false; - } - } - } - for (int o = 0; o < layer->getNbOutputs(); o++) { - ITensor* output{layer->getOutput(o)}; - // Optional outputs are nullptr here and are from RNN layers. - if (output && !output->dynamicRangeIsSet()) { - // Pooling must have the same input and output dynamic range. - if (layer->getType() == LayerType::kPOOLING) { - if (!output->setDynamicRange(-inRange, inRange)) { - return false; - } - } else { - if (!output->setDynamicRange(-outRange, outRange)) { - return false; - } - } - } - } - } - return true; -} - -// Walk the weights elements and overwrite (at most) 2 out of 4 elements to 0. -template -void sparsify(const T* values, int64_t count, int32_t k, int32_t rs, - std::vector& sparseWeights) { - const auto c = count / (k * rs); - sparseWeights.resize(count * sizeof(T)); - auto* sparseValues = reinterpret_cast(sparseWeights.data()); - - constexpr int32_t window = 4; - constexpr int32_t nonzeros = 2; - - const int32_t crs = c * rs; - const auto getIndex = [=](int32_t ki, int32_t ci, int32_t rsi) { - return ki * crs + ci * rs + rsi; - }; - - for (int64_t ki = 0; ki < k; ++ki) { - for (int64_t rsi = 0; rsi < rs; ++rsi) { - int32_t w = 0; - int32_t nz = 0; - for (int64_t ci = 0; ci < c; ++ci) { - const auto index = getIndex(ki, ci, rsi); - if (nz < nonzeros) { - sparseValues[index] = values[index]; - ++nz; - } else { - sparseValues[index] = 0; - } - if (++w == window) { - w = 0; - nz = 0; - } - } - } - } -} - -void sparsify(const Weights& weights, int32_t k, int32_t rs, - std::vector& sparseWeights) { - switch (weights.type) { - case DataType::kFLOAT: - sparsify(static_cast(weights.values), weights.count, k, rs, - sparseWeights); - break; - case DataType::kHALF: - sparsify(static_cast(weights.values), - weights.count, k, rs, sparseWeights); - break; - case DataType::kINT8: - case DataType::kINT32: - case DataType::kBOOL: - break; - } -} - -template -void setSparseWeights(L& l, int32_t k, int32_t rs, - std::vector& sparseWeights) { - auto weights = l.getKernelWeights(); - sparsify(weights, k, rs, sparseWeights); - weights.values = sparseWeights.data(); - l.setKernelWeights(weights); -} - -template -void transpose2DWeights(void* dst, void const* src, int32_t const m, - int32_t const n) { - ASSERT(dst != src); - T* tdst = reinterpret_cast(dst); - T const* tsrc = reinterpret_cast(src); - for (int32_t mi = 0; mi < m; ++mi) { - for (int32_t ni = 0; ni < n; ++ni) { - int32_t const isrc = mi * n + ni; - int32_t const idst = ni * m + mi; - tdst[idst] = tsrc[isrc]; - } - } -} - -// Sparsify the weights of Constant layers that are fed to MatMul via Shuffle -// layers. -// Forward analysis on the API graph to determine which weights to sparsify. -void sparsifyMatMulKernelWeights( - INetworkDefinition& network, - std::vector>& sparseWeights) { - using TensorToLayer = std::unordered_map; - using LayerToTensor = std::unordered_map; - - // 1. Collect layers and tensors information from the network. - TensorToLayer matmulI2L; - TensorToLayer constO2L; - TensorToLayer shuffleI2L; - LayerToTensor shuffleL2O; - auto collectMappingInfo = [&](int32_t const idx) { - ILayer* l = network.getLayer(idx); - switch (l->getType()) { - case LayerType::kMATRIX_MULTIPLY: { - // assume weights on the second input. - matmulI2L.insert({l->getInput(1), l}); - break; - } - case LayerType::kCONSTANT: { - DataType const dtype = static_cast(l)->getWeights().type; - if (dtype == DataType::kFLOAT || dtype == DataType::kHALF) { - // Sparsify float only. - constO2L.insert({l->getOutput(0), l}); - } - break; - } - case LayerType::kSHUFFLE: { - shuffleI2L.insert({l->getInput(0), l}); - shuffleL2O.insert({l, l->getOutput(0)}); - break; - } - default: - break; - } - }; - int32_t const nbLayers = network.getNbLayers(); - for (int32_t i = 0; i < nbLayers; ++i) { - collectMappingInfo(i); - } - if (matmulI2L.size() == 0 || constO2L.size() == 0) { - // No MatrixMultiply or Constant layer found, no weights to sparsify. - return; - } - - // Helper for analysis - auto isTranspose = [](Permutation const& perm) -> bool { - return (perm.order[0] == 1 && perm.order[1] == 0); - }; - auto is2D = [](Dims const& dims) -> bool { return dims.nbDims == 2; }; - auto isIdenticalReshape = [](Dims const& dims) -> bool { - for (int32_t i = 0; i < dims.nbDims; ++i) { - if (dims.d[i] != i || dims.d[i] != -1) { - return false; - } - } - return true; - }; - auto tensorReachedViaTranspose = [&](ITensor* t, - bool& needTranspose) -> ITensor* { - while (shuffleI2L.find(t) != shuffleI2L.end()) { - IShuffleLayer* s = static_cast(shuffleI2L.at(t)); - if (!is2D(s->getInput(0)->getDimensions()) || - !is2D(s->getReshapeDimensions()) || - !isIdenticalReshape(s->getReshapeDimensions())) { - break; - } - - if (isTranspose(s->getFirstTranspose())) { - needTranspose = !needTranspose; - } - if (isTranspose(s->getSecondTranspose())) { - needTranspose = !needTranspose; - } - - t = shuffleL2O.at(s); - } - return t; - }; - - // 2. Forward analysis to collect the Constant layers connected to MatMul via - // Transpose - std::unordered_map constantLayerToSparse; - for (auto& o2l : constO2L) { - // If need to transpose the weights of the Constant layer. - // Need to transpose by default due to semantic difference. - bool needTranspose{true}; - ITensor* t = tensorReachedViaTranspose(o2l.first, needTranspose); - if (matmulI2L.find(t) == matmulI2L.end()) { - continue; - } - - // check MatMul params... - IMatrixMultiplyLayer* mm = - static_cast(matmulI2L.at(t)); - bool const twoInputs = mm->getNbInputs() == 2; - bool const all2D = is2D(mm->getInput(0)->getDimensions()) && - is2D(mm->getInput(1)->getDimensions()); - bool const isSimple = mm->getOperation(0) == MatrixOperation::kNONE && - mm->getOperation(1) != MatrixOperation::kVECTOR; - if (!(twoInputs && all2D && isSimple)) { - continue; - } - if (mm->getOperation(1) == MatrixOperation::kTRANSPOSE) { - needTranspose = !needTranspose; - } - - constantLayerToSparse.insert( - {static_cast(o2l.second), needTranspose}); - } - - // 3. Finally, sparsify the weights - auto sparsifyConstantWeights = [&sparseWeights](IConstantLayer* layer, - bool const needTranspose) { - Dims dims = layer->getOutput(0)->getDimensions(); - ASSERT(dims.nbDims == 2); - int32_t const idxN = needTranspose ? 1 : 0; - int32_t const n = dims.d[idxN]; - int32_t const k = dims.d[1 - idxN]; - sparseWeights.emplace_back(); - std::vector& spw = sparseWeights.back(); - Weights w = layer->getWeights(); - DataType const dtype = w.type; - ASSERT(dtype == DataType::kFLOAT || - dtype == - DataType::kHALF); // non-float weights should have been ignored. - - if (needTranspose) { - if (dtype == DataType::kFLOAT) { - spw.resize(w.count * sizeof(float)); - transpose2DWeights(spw.data(), w.values, k, n); - } else if (dtype == DataType::kHALF) { - spw.resize(w.count * sizeof(half_float::half)); - transpose2DWeights(spw.data(), w.values, k, n); - } - - w.values = spw.data(); - std::vector tmpW; - sparsify(w, n, 1, tmpW); - - if (dtype == DataType::kFLOAT) { - transpose2DWeights(spw.data(), tmpW.data(), n, k); - } else if (dtype == DataType::kHALF) { - transpose2DWeights(spw.data(), tmpW.data(), n, k); - } - } else { - sparsify(w, n, 1, spw); - } - - w.values = spw.data(); - layer->setWeights(w); - }; - for (auto& l : constantLayerToSparse) { - sparsifyConstantWeights(l.first, l.second); - } -} - -void sparsify(INetworkDefinition& network, - std::vector>& sparseWeights) { - for (int32_t l = 0; l < network.getNbLayers(); ++l) { - auto* layer = network.getLayer(l); - const auto t = layer->getType(); - if (t == LayerType::kCONVOLUTION) { - auto& conv = *static_cast(layer); - const auto& dims = conv.getKernelSizeNd(); - if (dims.nbDims > 2) { - continue; - } - const auto k = conv.getNbOutputMaps(); - const auto rs = dims.d[0] * dims.d[1]; - sparseWeights.emplace_back(); - setSparseWeights(conv, k, rs, sparseWeights.back()); - } else if (t == LayerType::kFULLY_CONNECTED) { - auto& fc = *static_cast(layer); - const auto k = fc.getNbOutputChannels(); - sparseWeights.emplace_back(); - setSparseWeights(fc, k, 1, sparseWeights.back()); - } - } - - sparsifyMatMulKernelWeights(network, sparseWeights); -} - -void setLayerPrecisions(INetworkDefinition& network, - LayerPrecisions const& layerPrecisions) { - bool const hasGlobalPrecision{layerPrecisions.find("*") != - layerPrecisions.end()}; - auto const globalPrecision = - hasGlobalPrecision ? layerPrecisions.at("*") : nvinfer1::DataType::kFLOAT; - bool hasLayerPrecisionSkipped{false}; - for (int32_t layerIdx = 0; layerIdx < network.getNbLayers(); ++layerIdx) { - auto* layer = network.getLayer(layerIdx); - auto const layerName = layer->getName(); - if (layerPrecisions.find(layer->getName()) != layerPrecisions.end()) { - layer->setPrecision(layerPrecisions.at(layer->getName())); - } else if (hasGlobalPrecision) { - // We should not set the layer precision if its default precision is INT32 - // or Bool. - if (layer->getPrecision() == nvinfer1::DataType::kINT32 || - layer->getPrecision() == nvinfer1::DataType::kBOOL) { - hasLayerPrecisionSkipped = true; - sample::gLogVerbose << "Skipped setting precision for layer " - << layerName << " because the " - << " default layer precision is INT32 or Bool." - << std::endl; - continue; - } - // We should not set the constant layer precision if its weights are in - // INT32. - if (layer->getType() == nvinfer1::LayerType::kCONSTANT && - static_cast(layer)->getWeights().type == - nvinfer1::DataType::kINT32) { - hasLayerPrecisionSkipped = true; - sample::gLogVerbose << "Skipped setting precision for layer " - << layerName << " because this " - << "constant layer has INT32 weights." << std::endl; - continue; - } - // We should not set the layer precision if the layer operates on a shape - // tensor. - if (layer->getNbInputs() >= 1 && layer->getInput(0)->isShapeTensor()) { - hasLayerPrecisionSkipped = true; - sample::gLogVerbose << "Skipped setting precision for layer " - << layerName << " because this layer " - << "operates on a shape tensor." << std::endl; - continue; - } - if ((layer->getType() == nvinfer1::LayerType::kIDENTITY || - layer->getType() == nvinfer1::LayerType::kSHUFFLE) && - layer->getNbInputs() >= 1 && - layer->getInput(0)->getType() == nvinfer1::DataType::kINT32 && - layer->getNbOutputs() >= 1 && - layer->getOutput(0)->getType() == nvinfer1::DataType::kINT32) { - hasLayerPrecisionSkipped = true; - sample::gLogVerbose << "Skipped setting precision for layer " - << layerName << " because this " - << "layer has INT32 input and output." << std::endl; - continue; - } - // All heuristics passed. Set the layer precision. - layer->setPrecision(globalPrecision); - } - } - - if (hasLayerPrecisionSkipped) { - sample::gLogInfo << "Skipped setting precisions for some layers. Check " - "verbose logs for more details." - << std::endl; - } -} - -void setLayerOutputTypes(INetworkDefinition& network, - LayerOutputTypes const& layerOutputTypes) { - bool const hasGlobalOutputType{layerOutputTypes.find("*") != - layerOutputTypes.end()}; - auto const globalOutputType = hasGlobalOutputType - ? layerOutputTypes.at("*").at(0) - : nvinfer1::DataType::kFLOAT; - bool hasLayerOutputTypeSkipped{false}; - for (int32_t layerIdx = 0; layerIdx < network.getNbLayers(); ++layerIdx) { - auto* layer = network.getLayer(layerIdx); - auto const layerName = layer->getName(); - auto const nbOutputs = layer->getNbOutputs(); - if (layerOutputTypes.find(layer->getName()) != layerOutputTypes.end()) { - auto const& outputTypes = layerOutputTypes.at(layer->getName()); - bool const isBroadcast = (outputTypes.size() == 1); - if (!isBroadcast && - static_cast(outputTypes.size()) != nbOutputs) { - sample::gLogError - << "Layer " << layerName << " has " << nbOutputs << " outputs but " - << outputTypes.size() - << " output types are given in --layerOutputTypes flag." - << std::endl; - throw std::invalid_argument("Invalid --layerOutputTypes flag."); - } - for (int32_t outputIdx = 0; outputIdx < nbOutputs; ++outputIdx) { - layer->setOutputType(outputIdx, - outputTypes.at(isBroadcast ? 0 : outputIdx)); - } - } else if (hasGlobalOutputType) { - // We should not set the layer output types if its default precision is - // INT32 or Bool. - if (layer->getPrecision() == nvinfer1::DataType::kINT32 || - layer->getPrecision() == nvinfer1::DataType::kBOOL) { - hasLayerOutputTypeSkipped = true; - sample::gLogVerbose << "Skipped setting output types for layer " - << layerName << " because the " - << " default layer precision is INT32 or Bool." - << std::endl; - continue; - } - // We should not set the constant layer output types if its weights are in - // INT32. - if (layer->getType() == nvinfer1::LayerType::kCONSTANT && - static_cast(layer)->getWeights().type == - nvinfer1::DataType::kINT32) { - hasLayerOutputTypeSkipped = true; - sample::gLogVerbose << "Skipped setting output types for layer " - << layerName << " because this " - << "constant layer has INT32 weights." << std::endl; - continue; - } - for (int32_t outputIdx = 0; outputIdx < nbOutputs; ++outputIdx) { - // We should not set the output type if the output is a shape tensor. - if (layer->getOutput(0)->isShapeTensor()) { - hasLayerOutputTypeSkipped = true; - sample::gLogVerbose << "Skipped setting output type for output " - << outputIdx << " of layer " << layerName - << " because it is a shape tensor." << std::endl; - continue; - } - layer->setOutputType(outputIdx, globalOutputType); - } - } - } - - if (hasLayerOutputTypeSkipped) { - sample::gLogInfo << "Skipped setting output types for some layers. Check " - "verbose logs for more details." - << std::endl; - } -} - -void setMemoryPoolLimits(IBuilderConfig& config, BuildOptions const& build) { - auto const roundToBytes = [](double const sizeInMB) { - return static_cast(sizeInMB * (1 << 20)); - }; - if (build.workspace >= 0) { - config.setMemoryPoolLimit(MemoryPoolType::kWORKSPACE, - roundToBytes(build.workspace)); - } - if (build.dlaSRAM >= 0) { - config.setMemoryPoolLimit(MemoryPoolType::kDLA_MANAGED_SRAM, - roundToBytes(build.dlaSRAM)); - } - if (build.dlaLocalDRAM >= 0) { - config.setMemoryPoolLimit(MemoryPoolType::kDLA_LOCAL_DRAM, - roundToBytes(build.dlaLocalDRAM)); - } - if (build.dlaGlobalDRAM >= 0) { - config.setMemoryPoolLimit(MemoryPoolType::kDLA_GLOBAL_DRAM, - roundToBytes(build.dlaGlobalDRAM)); - } -} - -} // namespace - -bool setupNetworkAndConfig(const BuildOptions& build, const SystemOptions& sys, - IBuilder& builder, INetworkDefinition& network, - IBuilderConfig& config, std::ostream& err, - std::vector>& sparseWeights) { - IOptimizationProfile* profile{nullptr}; - if (build.maxBatch) { - builder.setMaxBatchSize(build.maxBatch); - } else { - profile = builder.createOptimizationProfile(); - } - - bool hasDynamicShapes{false}; - - bool broadcastInputFormats = - broadcastIOFormats(build.inputFormats, network.getNbInputs()); - - if (profile) { - // Check if the provided input tensor names match the input tensors of the - // engine. - // Throw an error if the provided input tensor names cannot be found because - // it implies a potential typo. - for (const auto& shape : build.shapes) { - bool tensorNameFound{false}; - for (int32_t i = 0; i < network.getNbInputs(); ++i) { - if (network.getInput(i)->getName() == shape.first) { - tensorNameFound = true; - break; - } - } - if (!tensorNameFound) { - sample::gLogError - << "Cannot find input tensor with name \"" << shape.first - << "\" in the network " - << "inputs! Please make sure the input tensor names are correct." - << std::endl; - return false; - } - } - } - - for (uint32_t i = 0, n = network.getNbInputs(); i < n; i++) { - // Set formats and data types of inputs - auto* input = network.getInput(i); - if (!build.inputFormats.empty()) { - int inputFormatIndex = broadcastInputFormats ? 0 : i; - input->setType(build.inputFormats[inputFormatIndex].first); - input->setAllowedFormats(build.inputFormats[inputFormatIndex].second); - } else { - switch (input->getType()) { - case DataType::kINT32: - case DataType::kBOOL: - case DataType::kHALF: - // Leave these as is. - break; - case DataType::kFLOAT: - case DataType::kINT8: - // User did not specify a floating-point format. Default to kFLOAT. - input->setType(DataType::kFLOAT); - break; - } - input->setAllowedFormats(1U << static_cast(TensorFormat::kLINEAR)); - } - - if (profile) { - auto const dims = input->getDimensions(); - auto const isScalar = dims.nbDims == 0; - auto const isDynamicInput = - std::any_of(dims.d, dims.d + dims.nbDims, - [](int32_t dim) { return dim == -1; }) || - input->isShapeTensor(); - if (isDynamicInput) { - hasDynamicShapes = true; - auto shape = build.shapes.find(input->getName()); - ShapeRange shapes{}; - - // If no shape is provided, set dynamic dimensions to 1. - if (shape == build.shapes.end()) { - constexpr int DEFAULT_DIMENSION = 1; - std::vector staticDims; - if (input->isShapeTensor()) { - if (isScalar) { - staticDims.push_back(1); - } else { - staticDims.resize(dims.d[0]); - std::fill(staticDims.begin(), staticDims.end(), - DEFAULT_DIMENSION); - } - } else { - staticDims.resize(dims.nbDims); - std::transform(dims.d, dims.d + dims.nbDims, staticDims.begin(), - [&](int dimension) { - return dimension > 0 ? dimension - : DEFAULT_DIMENSION; - }); - } - sample::gLogWarning - << "Dynamic dimensions required for input: " << input->getName() - << ", but no shapes were provided. Automatically overriding " - "shape to: " - << staticDims << std::endl; - std::fill(shapes.begin(), shapes.end(), staticDims); - } else { - shapes = shape->second; - } - - std::vector profileDims{}; - if (input->isShapeTensor()) { - profileDims = shapes[static_cast(OptProfileSelector::kMIN)]; - SMP_RETVAL_IF_FALSE(profile->setShapeValues( - input->getName(), OptProfileSelector::kMIN, - profileDims.data(), - static_cast(profileDims.size())), - "Error in set shape values MIN", false, err); - profileDims = shapes[static_cast(OptProfileSelector::kOPT)]; - SMP_RETVAL_IF_FALSE(profile->setShapeValues( - input->getName(), OptProfileSelector::kOPT, - profileDims.data(), - static_cast(profileDims.size())), - "Error in set shape values OPT", false, err); - profileDims = shapes[static_cast(OptProfileSelector::kMAX)]; - SMP_RETVAL_IF_FALSE(profile->setShapeValues( - input->getName(), OptProfileSelector::kMAX, - profileDims.data(), - static_cast(profileDims.size())), - "Error in set shape values MAX", false, err); - } else { - profileDims = shapes[static_cast(OptProfileSelector::kMIN)]; - SMP_RETVAL_IF_FALSE( - profile->setDimensions(input->getName(), OptProfileSelector::kMIN, - toDims(profileDims)), - "Error in set dimensions to profile MIN", false, err); - profileDims = shapes[static_cast(OptProfileSelector::kOPT)]; - SMP_RETVAL_IF_FALSE( - profile->setDimensions(input->getName(), OptProfileSelector::kOPT, - toDims(profileDims)), - "Error in set dimensions to profile OPT", false, err); - profileDims = shapes[static_cast(OptProfileSelector::kMAX)]; - SMP_RETVAL_IF_FALSE( - profile->setDimensions(input->getName(), OptProfileSelector::kMAX, - toDims(profileDims)), - "Error in set dimensions to profile MAX", false, err); - } - } - } - } - - if (!hasDynamicShapes && !build.shapes.empty()) { - sample::gLogError << "Static model does not take explicit shapes since the " - "shape of inference tensors will be " - "determined by the model itself" - << std::endl; - return false; - } - - if (profile && hasDynamicShapes) { - SMP_RETVAL_IF_FALSE(profile->isValid(), - "Required optimization profile is invalid", false, err); - SMP_RETVAL_IF_FALSE(config.addOptimizationProfile(profile) != -1, - "Error in add optimization profile", false, err); - } - - bool broadcastOutputFormats = - broadcastIOFormats(build.outputFormats, network.getNbOutputs(), false); - - for (uint32_t i = 0, n = network.getNbOutputs(); i < n; i++) { - // Set formats and data types of outputs - auto* output = network.getOutput(i); - if (!build.outputFormats.empty()) { - int outputFormatIndex = broadcastOutputFormats ? 0 : i; - output->setType(build.outputFormats[outputFormatIndex].first); - output->setAllowedFormats(build.outputFormats[outputFormatIndex].second); - } else { - output->setAllowedFormats(1U << static_cast(TensorFormat::kLINEAR)); - } - } - - setMemoryPoolLimits(config, build); - - if (build.timingCacheMode == TimingCacheMode::kDISABLE) { - config.setFlag(BuilderFlag::kDISABLE_TIMING_CACHE); - } - - if (!build.tf32) { - config.clearFlag(BuilderFlag::kTF32); - } - - if (build.refittable) { - config.setFlag(BuilderFlag::kREFIT); - } - - if (build.sparsity != SparsityFlag::kDISABLE) { - config.setFlag(BuilderFlag::kSPARSE_WEIGHTS); - if (build.sparsity == SparsityFlag::kFORCE) { - sparsify(network, sparseWeights); - } - } - - config.setProfilingVerbosity(build.profilingVerbosity); - config.setMinTimingIterations(build.minTiming); - config.setAvgTimingIterations(build.avgTiming); - - if (build.fp16) { - config.setFlag(BuilderFlag::kFP16); - } - - if (build.int8) { - config.setFlag(BuilderFlag::kINT8); - } - - if (build.int8 && !build.fp16) { - sample::gLogInfo << "FP32 and INT8 precisions have been specified - more " - "performance might be enabled by additionally " - "specifying --fp16 or --best" - << std::endl; - } - - auto isInt8 = [](const IOFormat& format) { - return format.first == DataType::kINT8; - }; - auto int8IO = std::count_if(build.inputFormats.begin(), - build.inputFormats.end(), isInt8) + - std::count_if(build.outputFormats.begin(), - build.outputFormats.end(), isInt8); - - auto hasQDQLayers = [](INetworkDefinition& network) { - // Determine if our network has QDQ layers. - const auto nbLayers = network.getNbLayers(); - for (int32_t i = 0; i < nbLayers; i++) { - const auto& layer = network.getLayer(i); - if (layer->getType() == LayerType::kQUANTIZE || - layer->getType() == LayerType::kDEQUANTIZE) { - return true; - } - } - return false; - }; - - if (!hasQDQLayers(network) && (build.int8 || int8IO) && - build.calibration.empty()) { - // Explicitly set int8 scales if no calibrator is provided and if I/O - // tensors use int8, - // because auto calibration does not support this case. - SMP_RETVAL_IF_FALSE(setTensorDynamicRange(network), - "Error in set tensor dynamic range.", false, err); - } else if (build.int8) { - if (!hasQDQLayers(network) && int8IO) { - try { - // Set dynamic ranges of int8 inputs / outputs to match scales loaded - // from calibration cache - // TODO http://nvbugs/3262234 Change the network validation so that this - // workaround can be removed - setTensorScalesFromCalibration(network, build.inputFormats, - build.outputFormats, build.calibration); - } catch (std::exception&) { - sample::gLogError << "Int8IO was specified but impossible to read " - "tensor scales from provided calibration cache " - "file" - << std::endl; - return false; - } - } - IOptimizationProfile* profileCalib{nullptr}; - if (!build.shapesCalib.empty()) { - profileCalib = builder.createOptimizationProfile(); - for (uint32_t i = 0, n = network.getNbInputs(); i < n; i++) { - auto* input = network.getInput(i); - Dims profileDims{}; - auto shape = build.shapesCalib.find(input->getName()); - ShapeRange shapesCalib{}; - shapesCalib = shape->second; - - profileDims = - toDims(shapesCalib[static_cast(OptProfileSelector::kOPT)]); - // Here we check only kMIN as all profileDims are the same. - SMP_RETVAL_IF_FALSE( - profileCalib->setDimensions(input->getName(), - OptProfileSelector::kMIN, profileDims), - "Error in set dimensions to calibration profile OPT", false, err); - profileCalib->setDimensions(input->getName(), OptProfileSelector::kOPT, - profileDims); - profileCalib->setDimensions(input->getName(), OptProfileSelector::kMAX, - profileDims); - } - SMP_RETVAL_IF_FALSE(profileCalib->isValid(), - "Calibration profile is invalid", false, err); - SMP_RETVAL_IF_FALSE(config.setCalibrationProfile(profileCalib), - "Error in set calibration profile", false, err); - } - - std::vector elemCount{}; - for (int i = 0; i < network.getNbInputs(); i++) { - auto* input = network.getInput(i); - auto const dims = input->getDimensions(); - auto const isDynamicInput = std::any_of( - dims.d, dims.d + dims.nbDims, [](int32_t dim) { return dim == -1; }); - - if (profileCalib) { - elemCount.push_back(volume(profileCalib->getDimensions( - input->getName(), OptProfileSelector::kOPT))); - } else if (profile && isDynamicInput) { - elemCount.push_back(volume(profile->getDimensions( - input->getName(), OptProfileSelector::kOPT))); - } else { - elemCount.push_back(volume(input->getDimensions())); - } - } - - config.setInt8Calibrator( - new RndInt8Calibrator(1, elemCount, build.calibration, network, err)); - } - - if (build.directIO) { - config.setFlag(BuilderFlag::kDIRECT_IO); - } - - switch (build.precisionConstraints) { - case PrecisionConstraints::kNONE: - // It's the default for TensorRT. - break; - case PrecisionConstraints::kOBEY: - config.setFlag(BuilderFlag::kOBEY_PRECISION_CONSTRAINTS); - break; - case PrecisionConstraints::kPREFER: - config.setFlag(BuilderFlag::kPREFER_PRECISION_CONSTRAINTS); - break; - } - - if (!build.layerPrecisions.empty() && - build.precisionConstraints != PrecisionConstraints::kNONE) { - setLayerPrecisions(network, build.layerPrecisions); - } - - if (!build.layerOutputTypes.empty() && - build.precisionConstraints != PrecisionConstraints::kNONE) { - setLayerOutputTypes(network, build.layerOutputTypes); - } - - if (build.safe) { - config.setEngineCapability(sys.DLACore != -1 - ? EngineCapability::kDLA_STANDALONE - : EngineCapability::kSAFETY); - } - - if (build.restricted) { - config.setFlag(BuilderFlag::kSAFETY_SCOPE); - } - - if (sys.DLACore != -1) { - if (sys.DLACore < builder.getNbDLACores()) { - config.setDefaultDeviceType(DeviceType::kDLA); - config.setDLACore(sys.DLACore); - config.setFlag(BuilderFlag::kPREFER_PRECISION_CONSTRAINTS); - - if (sys.fallback) { - config.setFlag(BuilderFlag::kGPU_FALLBACK); - } else { - // Reformatting runs on GPU, so avoid I/O reformatting. - config.setFlag(BuilderFlag::kDIRECT_IO); - } - if (!build.int8) { - config.setFlag(BuilderFlag::kFP16); - } - } else { - err << "Cannot create DLA engine, " << sys.DLACore << " not available" - << std::endl; - return false; - } - } - - if (build.enabledTactics || build.disabledTactics) { - TacticSources tacticSources = config.getTacticSources(); - tacticSources |= build.enabledTactics; - tacticSources &= ~build.disabledTactics; - config.setTacticSources(tacticSources); - } - - return true; -} - -//! -//! \brief Create an engine for a network defintion -//! -//! \return Pointer to the engine created or nullptr if the creation failed -//! -bool networkToEngine(const BuildOptions& build, const SystemOptions& sys, - IBuilder& builder, BuildEnvironment& env, - std::ostream& err) { - TrtUniquePtr config{builder.createBuilderConfig()}; - std::vector> sparseWeights; - SMP_RETVAL_IF_FALSE(config != nullptr, "Config creation failed", false, err); - SMP_RETVAL_IF_FALSE(setupNetworkAndConfig(build, sys, builder, *env.network, - *config, err, sparseWeights), - "Network And Config setup failed", false, err); - - std::unique_ptr timingCache{nullptr}; - // Try to load cache from file. Create a fresh cache if the file doesn't exist - if (build.timingCacheMode == TimingCacheMode::kGLOBAL) { - std::vector loadedCache = loadTimingCacheFile(build.timingCacheFile); - timingCache.reset(config->createTimingCache( - static_cast(loadedCache.data()), loadedCache.size())); - SMP_RETVAL_IF_FALSE(timingCache != nullptr, "TimingCache creation failed", - false, err); - config->setTimingCache(*timingCache, false); - } - - // CUDA stream used for profiling by the builder. - auto profileStream = samplesCommon::makeCudaStream(); - SMP_RETVAL_IF_FALSE(profileStream != nullptr, "Cuda stream creation failed", - false, err); - config->setProfileStream(*profileStream); - - TrtUniquePtr serializedEngine{ - builder.buildSerializedNetwork(*env.network, *config)}; - SMP_RETVAL_IF_FALSE(serializedEngine != nullptr, - "Engine could not be created from network", false, err); - - env.engineBlob.resize(serializedEngine->size()); - std::memcpy(env.engineBlob.data(), serializedEngine->data(), - serializedEngine->size()); - - if (build.safe) { - ASSERT(sample::hasSafeRuntime()); - std::unique_ptr safeRuntime{ - sample::createSafeInferRuntime(sample::gLogger.getTRTLogger())}; - SMP_RETVAL_IF_FALSE(safeRuntime != nullptr, "SafeRuntime creation failed", - false, err); - safeRuntime->setErrorRecorder(&gRecorder); - env.safeEngine.reset(safeRuntime->deserializeCudaEngine( - serializedEngine->data(), serializedEngine->size())); - if (build.consistency) { - checkSafeEngine(serializedEngine->data(), serializedEngine->size()); - } - SMP_RETVAL_IF_FALSE(env.safeEngine != nullptr, - "SafeEngine deserialization failed", false, err); - } else { - TrtUniquePtr runtime{ - createInferRuntime(sample::gLogger.getTRTLogger())}; - SMP_RETVAL_IF_FALSE(runtime != nullptr, "Runtime creation failed", false, - err); - runtime->setErrorRecorder(&gRecorder); - env.engine.reset(runtime->deserializeCudaEngine(serializedEngine->data(), - serializedEngine->size())); - SMP_RETVAL_IF_FALSE(env.engine != nullptr, "Engine deserialization failed", - false, err); - if (build.timingCacheMode == TimingCacheMode::kGLOBAL) { - auto const& timingCache = config->getTimingCache(); - std::unique_ptr timingCacheHostData{ - timingCache->serialize()}; - SMP_RETVAL_IF_FALSE(timingCacheHostData != nullptr, - "Timing Cache serialization failed", false, err); - saveTimingCacheFile(build.timingCacheFile, timingCacheHostData.get()); - } - if (config->getInt8Calibrator()) { - delete config->getInt8Calibrator(); - } - } - return true; -} - -//! -//! \brief Parse a given model, create a network and an engine. -//! -bool modelToBuildEnv(const ModelOptions& model, const BuildOptions& build, - const SystemOptions& sys, BuildEnvironment& env, - std::ostream& err) { - TrtUniquePtr builder{ - createInferBuilder(sample::gLogger.getTRTLogger())}; - SMP_RETVAL_IF_FALSE(builder != nullptr, "Builder creation failed", false, - err); - builder->setErrorRecorder(&gRecorder); - auto networkFlags = - (build.maxBatch) - ? 0U - : 1U << static_cast( - nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); - - env.network.reset(builder->createNetworkV2(networkFlags)); - SMP_RETVAL_IF_FALSE(env.network != nullptr, "Network creation failed", false, - err); - env.parser = modelToNetwork(model, *env.network, err); - SMP_RETVAL_IF_FALSE(env.parser.operator bool(), "Parsing model failed", false, - err); - SMP_RETVAL_IF_FALSE(networkToEngine(build, sys, *builder, env, err), - "Building engine failed", false, err); - return true; -} - -namespace { -std::pair, std::vector> -getLayerWeightsRolePair(IRefitter& refitter) { - // Get number of refittable items. - auto const nbAll = refitter.getAll(0, nullptr, nullptr); - std::vector layerNames(nbAll); - // Allocate buffers for the items and get them. - std::vector weightsRoles(nbAll); - refitter.getAll(nbAll, layerNames.data(), weightsRoles.data()); - std::vector layerNameStrs(nbAll); - std::transform(layerNames.begin(), layerNames.end(), layerNameStrs.begin(), - [](char const* name) { - if (name == nullptr) { - return std::string{}; - } - return std::string{name}; - }); - return {layerNameStrs, weightsRoles}; -} - -std::pair, std::vector> -getMissingLayerWeightsRolePair(IRefitter& refitter) { - // Get number of refittable items. - auto const nbMissing = refitter.getMissing(0, nullptr, nullptr); - std::vector layerNames(nbMissing); - // Allocate buffers for the items and get them. - std::vector weightsRoles(nbMissing); - refitter.getMissing(nbMissing, layerNames.data(), weightsRoles.data()); - std::vector layerNameStrs(nbMissing); - std::transform(layerNames.begin(), layerNames.end(), layerNameStrs.begin(), - [](char const* name) { - if (name == nullptr) { - return std::string{}; - } - return std::string{name}; - }); - return {layerNameStrs, weightsRoles}; -} - -bool loadEngineToEnv(const std::string& engine, int DLACore, bool safe, - bool enableConsistency, BuildEnvironment& env, - std::ostream& err) { - std::ifstream engineFile(engine, std::ios::binary); - SMP_RETVAL_IF_FALSE(engineFile.good(), "", false, - err << "Error opening engine file: " << engine); - engineFile.seekg(0, std::ifstream::end); - int64_t fsize = engineFile.tellg(); - engineFile.seekg(0, std::ifstream::beg); - - env.engineBlob.resize(fsize); - engineFile.read(reinterpret_cast(env.engineBlob.data()), fsize); - SMP_RETVAL_IF_FALSE(engineFile.good(), "", false, - err << "Error loading engine file: " << engine); - - if (safe) { - ASSERT(sample::hasSafeRuntime()); - std::unique_ptr safeRuntime{ - sample::createSafeInferRuntime(sample::gLogger.getTRTLogger())}; - safeRuntime->setErrorRecorder(&gRecorder); - env.safeEngine.reset( - safeRuntime->deserializeCudaEngine(env.engineBlob.data(), fsize)); - bool result = env.safeEngine != nullptr; - if (result && enableConsistency) { - checkSafeEngine(env.engineBlob.data(), fsize); - } - return result; - } - - TrtUniquePtr runtime{ - createInferRuntime(sample::gLogger.getTRTLogger())}; - if (DLACore != -1) { - runtime->setDLACore(DLACore); - } - runtime->setErrorRecorder(&gRecorder); - env.engine.reset( - runtime->deserializeCudaEngine(env.engineBlob.data(), fsize, nullptr)); - return env.engine != nullptr; -} -} // namespace - -void dumpRefittable(nvinfer1::ICudaEngine& engine) { - TrtUniquePtr refitter{ - createInferRefitter(engine, sample::gLogger.getTRTLogger())}; - if (refitter == nullptr) { - sample::gLogError << "Failed to create a refitter." << std::endl; - return; - } - - auto const& layerWeightsRolePair = getLayerWeightsRolePair(*refitter); - auto const& layerNames = layerWeightsRolePair.first; - auto const& weightsRoles = layerWeightsRolePair.second; - auto const nbAll = layerWeightsRolePair.first.size(); - for (size_t i = 0; i < nbAll; ++i) { - sample::gLogInfo << layerNames[i] << " " << weightsRoles[i] << std::endl; - } -} - -ICudaEngine* loadEngine(const std::string& engine, int DLACore, - std::ostream& err) { - BuildEnvironment env; - return loadEngineToEnv(engine, DLACore, false, false, env, err) - ? env.engine.release() - : nullptr; -} - -bool saveEngine(const ICudaEngine& engine, const std::string& fileName, - std::ostream& err) { - std::ofstream engineFile(fileName, std::ios::binary); - if (!engineFile) { - err << "Cannot open engine file: " << fileName << std::endl; - return false; - } - - TrtUniquePtr serializedEngine{engine.serialize()}; - if (serializedEngine == nullptr) { - err << "Engine serialization failed" << std::endl; - return false; - } - - engineFile.write(static_cast(serializedEngine->data()), - serializedEngine->size()); - return !engineFile.fail(); -} - -bool getEngineBuildEnv(const ModelOptions& model, const BuildOptions& build, - const SystemOptions& sys, BuildEnvironment& env, - std::ostream& err) { - TrtUniquePtr engine; - TrtUniquePtr network; - Parser parser; - - bool createEngineSuccess{false}; - - if (build.load) { - createEngineSuccess = loadEngineToEnv(build.engine, sys.DLACore, build.safe, - build.consistency, env, err); - } else { - createEngineSuccess = modelToBuildEnv(model, build, sys, env, err); - } - - SMP_RETVAL_IF_FALSE(createEngineSuccess, - "Failed to create engine from model.", false, err); - - if (build.save) { - std::ofstream engineFile(build.engine, std::ios::binary); - engineFile.write(reinterpret_cast(env.engineBlob.data()), - env.engineBlob.size()); - SMP_RETVAL_IF_FALSE(!engineFile.fail(), "Saving engine to file failed.", - false, err); - } - return true; -} - -IHostMemory* networkToSerialized(const BuildOptions& build, - const SystemOptions& sys, IBuilder& builder, - INetworkDefinition& network, - std::ostream& err) { - TrtUniquePtr config{builder.createBuilderConfig()}; - std::vector> sparseWeights; - SMP_RETVAL_IF_FALSE(config != nullptr, "Config creation failed", nullptr, - err); - SMP_RETVAL_IF_FALSE(setupNetworkAndConfig(build, sys, builder, network, - *config, err, sparseWeights), - "Network And Config setup failed", nullptr, err); - return builder.buildSerializedNetwork(network, *config); -} - -IHostMemory* modelToSerialized(const ModelOptions& model, - const BuildOptions& build, - const SystemOptions& sys, std::ostream& err) { - TrtUniquePtr builder{ - createInferBuilder(sample::gLogger.getTRTLogger())}; - SMP_RETVAL_IF_FALSE(builder != nullptr, "Builder creation failed", nullptr, - err); - builder->setErrorRecorder(&gRecorder); - - auto networkFlags = - (build.maxBatch) - ? 0U - : 1U << static_cast( - nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); - - TrtUniquePtr network{ - builder->createNetworkV2(networkFlags)}; - SMP_RETVAL_IF_FALSE(network != nullptr, "Network creation failed", nullptr, - err); - - Parser parser = modelToNetwork(model, *network, err); - SMP_RETVAL_IF_FALSE(parser.operator bool(), "Parsing model failed", nullptr, - err); - - return networkToSerialized(build, sys, *builder, *network, err); -} - -bool serializeAndSave(const ModelOptions& model, const BuildOptions& build, - const SystemOptions& sys, std::ostream& err) { - TrtUniquePtr serialized{ - modelToSerialized(model, build, sys, err)}; - SMP_RETVAL_IF_FALSE(serialized != nullptr, "Network serialization failed", - false, err); - - std::ofstream engineFile(build.engine, std::ios::binary); - SMP_RETVAL_IF_FALSE(!!engineFile, - "Cannot open a file to save a serialize network", false, - err); - engineFile.write(static_cast(serialized->data()), serialized->size()); - return !engineFile.fail(); -} - -// There is not a getWeightsName API, so we need to use WeightsRole. -std::vector> -getAllRefitWeightsForLayer(const ILayer& l) { - switch (l.getType()) { - case LayerType::kCONSTANT: { - const auto& layer = static_cast(l); - return {std::make_pair(WeightsRole::kCONSTANT, layer.getWeights())}; - } - case LayerType::kCONVOLUTION: { - const auto& layer = static_cast(l); - return {std::make_pair(WeightsRole::kKERNEL, layer.getKernelWeights()), - std::make_pair(WeightsRole::kBIAS, layer.getBiasWeights())}; - } - case LayerType::kDECONVOLUTION: { - const auto& layer = static_cast(l); - return {std::make_pair(WeightsRole::kKERNEL, layer.getKernelWeights()), - std::make_pair(WeightsRole::kBIAS, layer.getBiasWeights())}; - } - case LayerType::kFULLY_CONNECTED: { - const auto& layer = static_cast(l); - return {std::make_pair(WeightsRole::kKERNEL, layer.getKernelWeights()), - std::make_pair(WeightsRole::kBIAS, layer.getBiasWeights())}; - } - case LayerType::kSCALE: { - const auto& layer = static_cast(l); - return {std::make_pair(WeightsRole::kSCALE, layer.getScale()), - std::make_pair(WeightsRole::kSHIFT, layer.getShift())}; - } - case LayerType::kRNN_V2: - case LayerType::kACTIVATION: - case LayerType::kPOOLING: - case LayerType::kLRN: - case LayerType::kSOFTMAX: - case LayerType::kSHUFFLE: - case LayerType::kCONCATENATION: - case LayerType::kELEMENTWISE: - case LayerType::kPLUGIN: - case LayerType::kUNARY: - case LayerType::kPADDING: - case LayerType::kREDUCE: - case LayerType::kTOPK: - case LayerType::kGATHER: - case LayerType::kMATRIX_MULTIPLY: - case LayerType::kRAGGED_SOFTMAX: - case LayerType::kIDENTITY: - case LayerType::kPLUGIN_V2: - case LayerType::kSLICE: - case LayerType::kFILL: - case LayerType::kSHAPE: - case LayerType::kPARAMETRIC_RELU: - case LayerType::kRESIZE: - case LayerType::kTRIP_LIMIT: - case LayerType::kRECURRENCE: - case LayerType::kITERATOR: - case LayerType::kLOOP_OUTPUT: - case LayerType::kSELECT: - case LayerType::kQUANTIZE: - case LayerType::kDEQUANTIZE: - case LayerType::kCONDITION: - case LayerType::kCONDITIONAL_INPUT: - case LayerType::kCONDITIONAL_OUTPUT: - case LayerType::kSCATTER: - case LayerType::kEINSUM: - case LayerType::kASSERTION: - return {}; - } - return {}; -} - -bool timeRefit(INetworkDefinition const& network, nvinfer1::ICudaEngine& engine, - bool multiThreading) { - using time_point = std::chrono::time_point; - using durationMs = std::chrono::duration; - - auto const nbLayers = network.getNbLayers(); - TrtUniquePtr refitter{ - createInferRefitter(engine, sample::gLogger.getTRTLogger())}; - // Set max threads that can be used by refitter. - if (multiThreading && !refitter->setMaxThreads(10)) { - sample::gLogError << "Failed to set max threads to refitter." << std::endl; - return false; - } - auto const& layerWeightsRolePair = getLayerWeightsRolePair(*refitter); - // We use std::string instead of const char* since we can have copies of layer - // names. - std::set> layerRoleSet; - - auto const& layerNames = layerWeightsRolePair.first; - auto const& weightsRoles = layerWeightsRolePair.second; - - std::transform(layerNames.begin(), layerNames.end(), weightsRoles.begin(), - std::inserter(layerRoleSet, layerRoleSet.begin()), - [](std::string const& layerName, WeightsRole const role) { - return std::make_pair(layerName, role); - }); - - auto const isRefittable = [&layerRoleSet](char const* layerName, - WeightsRole const role) { - return layerRoleSet.find(std::make_pair(layerName, role)) != - layerRoleSet.end(); - }; - - auto const setWeights = [&] { - for (int32_t i = 0; i < nbLayers; i++) { - auto const layer = network.getLayer(i); - auto const roleWeightsVec = getAllRefitWeightsForLayer(*layer); - for (auto const& roleWeights : roleWeightsVec) { - if (isRefittable(layer->getName(), roleWeights.first)) { - bool const success = refitter->setWeights( - layer->getName(), roleWeights.first, roleWeights.second); - if (!success) { - return false; - } - } - } - } - return true; - }; - - auto const reportMissingWeights = [&] { - auto const& missingPair = getMissingLayerWeightsRolePair(*refitter); - auto const& layerNames = missingPair.first; - auto const& weightsRoles = missingPair.second; - for (size_t i = 0; i < layerNames.size(); ++i) { - sample::gLogError << "Missing (" << layerNames[i] << ", " - << weightsRoles[i] << ") for refitting." << std::endl; - } - return layerNames.empty(); - }; - - // Warm up and report missing weights - bool const success = - setWeights() && reportMissingWeights() && refitter->refitCudaEngine(); - if (!success) { - return false; - } - - constexpr int32_t loop = 10; - time_point const refitStartTime{std::chrono::steady_clock::now()}; - { - for (int32_t l = 0; l < loop; l++) { - bool const success = setWeights() && refitter->refitCudaEngine(); - if (!success) { - return false; - } - } - } - time_point const refitEndTime{std::chrono::steady_clock::now()}; - - sample::gLogInfo << "Engine refitted" - << " in " - << durationMs(refitEndTime - refitStartTime).count() / loop - << " ms." << std::endl; - return true; -} - -namespace { -void* initSafeRuntime() { - void* handle{nullptr}; -#if !defined(_WIN32) - std::string const dllName{samplesCommon::isDebug() - ? "libnvinfer_safe_debug.so.8" - : "libnvinfer_safe.so.8"}; -#if SANITIZER_BUILD - handle = dlopen(dllName.c_str(), RTLD_LAZY | RTLD_NODELETE); -#else - handle = dlopen(dllName.c_str(), RTLD_LAZY); -#endif -#endif - return handle; -} - -void* initConsistencyCheckerLibrary() { - void* handle{nullptr}; -#if !defined(_WIN32) - std::string const dllName{samplesCommon::isDebug() - ? "libnvinfer_checker_debug.so.8" - : "libnvinfer_checker.so.8"}; -#if SANITIZER_BUILD - handle = dlopen(dllName.c_str(), RTLD_LAZY | RTLD_NODELETE); -#else - handle = dlopen(dllName.c_str(), RTLD_LAZY); -#endif -#endif - return handle; -} - -#if !defined(_WIN32) -struct DllDeleter { - void operator()(void* handle) { - if (handle != nullptr) { - dlclose(handle); - } - } -}; -const std::unique_ptr safeRuntimeLibrary{initSafeRuntime()}; -const std::unique_ptr consistencyCheckerLibrary{ - initConsistencyCheckerLibrary()}; -#endif -} // namespace - -bool hasSafeRuntime() { - bool ret{false}; -#if !defined(_WIN32) - ret = (safeRuntimeLibrary != nullptr); -#endif - return ret; -} - -nvinfer1::safe::IRuntime* -createSafeInferRuntime(nvinfer1::ILogger& logger) noexcept { - nvinfer1::safe::IRuntime* runtime{nullptr}; -#if !defined(_WIN32) - constexpr char symbolName[] = - "_ZN8nvinfer14safe18createInferRuntimeERNS_7ILoggerE"; - typedef nvinfer1::safe::IRuntime* (*CreateInferRuntimeFn)(nvinfer1::ILogger & - logger); - if (hasSafeRuntime()) { - auto createFn = reinterpret_cast( - dlsym(safeRuntimeLibrary.get(), symbolName)); - if (createFn != nullptr) { - runtime = createFn(logger); - } - } -#endif - return runtime; -} - -bool hasConsistencyChecker() { - bool ret{false}; -#if !defined(_WIN32) - ret = (consistencyCheckerLibrary != nullptr); -#endif - return ret; -} - -nvinfer1::consistency::IConsistencyChecker* -createConsistencyChecker(nvinfer1::ILogger& logger, - void const* serializedEngine, - int32_t const engineSize) noexcept { - nvinfer1::consistency::IConsistencyChecker* checker{nullptr}; - - if (serializedEngine == nullptr || engineSize == 0) { - return checker; - } - -#if !defined(_WIN32) - constexpr char symbolName[] = "createConsistencyChecker_INTERNAL"; - typedef nvinfer1::consistency::IConsistencyChecker* (*CreateCheckerFn)( - nvinfer1::ILogger * logger, void const* data, size_t size, - uint32_t version); - if (hasSafeRuntime()) { - auto createFn = reinterpret_cast( - dlsym(consistencyCheckerLibrary.get(), symbolName)); - if (createFn != nullptr) { - checker = - createFn(&logger, serializedEngine, engineSize, NV_TENSORRT_VERSION); - } - } -#endif - return checker; -} - -bool checkSafeEngine(void const* serializedEngine, int32_t const engineSize) { - if (!hasConsistencyChecker()) { - sample::gLogError << "Cannot perform consistency check because the checker " - "is not loaded.." - << std::endl; - return false; - } - auto checker = std::unique_ptr( - createConsistencyChecker(sample::gLogger.getTRTLogger(), serializedEngine, - engineSize)); - if (checker.get() == nullptr) { - sample::gLogError << "Failed to create consistency checker." << std::endl; - return false; - } - sample::gLogInfo << "Start consistency checking." << std::endl; - if (!checker->validate()) { - sample::gLogError << "Consistency validation failed." << std::endl; - return false; - } - sample::gLogInfo << "Consistency validation passed." << std::endl; - return true; -} -} // namespace sample diff --git a/csrcs/fastdeploy/backends/tensorrt/common/sampleEngines.h b/csrcs/fastdeploy/backends/tensorrt/common/sampleEngines.h deleted file mode 100644 index 1b7b7a000..000000000 --- a/csrcs/fastdeploy/backends/tensorrt/common/sampleEngines.h +++ /dev/null @@ -1,195 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef TRT_SAMPLE_ENGINES_H -#define TRT_SAMPLE_ENGINES_H - -#include -#include - -//#include "NvCaffeParser.h" -#include "NvInfer.h" -#include "NvInferConsistency.h" -#include "NvInferSafeRuntime.h" -#include "NvOnnxParser.h" -#include "sampleOptions.h" -#include "sampleUtils.h" - -namespace sample { - -struct Parser { -// TrtUniquePtr caffeParser; - TrtUniquePtr onnxParser; - - operator bool() const { return false || onnxParser; } -}; - -struct BuildEnvironment { - TrtUniquePtr network; - //! Parser that creates the network. Must be declared *after* network, so that - //! when - //! ~BuildEnvironment() executes, the parser is destroyed before the network - //! is destroyed. - Parser parser; - TrtUniquePtr engine; - std::unique_ptr safeEngine; - std::vector engineBlob; -}; - -//! -//! \brief Generate a network definition for a given model -//! -//! \return Parser The parser used to initialize the network and that holds the -//! weights for the network, or an invalid -//! parser (the returned parser converts to false if tested) -//! -//! Constant input dimensions in the model must not be changed in the -//! corresponding -//! network definition, because its correctness may rely on the constants. -//! -//! \see Parser::operator bool() -//! -Parser modelToNetwork(const ModelOptions& model, - nvinfer1::INetworkDefinition& network, std::ostream& err); - -//! -//! \brief Set up network and config -//! -//! \return boolean Return true if network and config were successfully set -//! -bool setupNetworkAndConfig(const BuildOptions& build, const SystemOptions& sys, - IBuilder& builder, INetworkDefinition& network, - IBuilderConfig& config, std::ostream& err, - std::vector>& sparseWeights); - -//! -//! \brief Log refittable layers and weights of a refittable engine -//! -void dumpRefittable(nvinfer1::ICudaEngine& engine); - -//! -//! \brief Load a serialized engine -//! -//! \return Pointer to the engine loaded or nullptr if the operation failed -//! -nvinfer1::ICudaEngine* loadEngine(const std::string& engine, int DLACore, - std::ostream& err); - -//! -//! \brief Save an engine into a file -//! -//! \return boolean Return true if the engine was successfully saved -//! -bool saveEngine(const nvinfer1::ICudaEngine& engine, - const std::string& fileName, std::ostream& err); - -//! -//! \brief Create an engine from model or serialized file, and optionally save -//! engine -//! -//! \return Pointer to the engine created or nullptr if the creation failed -//! -bool getEngineBuildEnv(const ModelOptions& model, const BuildOptions& build, - const SystemOptions& sys, BuildEnvironment& env, - std::ostream& err); - -//! -//! \brief Create an engine from model or serialized file, and optionally save -//! engine -//! -//! \return Pointer to the engine created or nullptr if the creation failed -//! -inline TrtUniquePtr getEngine(const ModelOptions& model, - const BuildOptions& build, - const SystemOptions& sys, - std::ostream& err) { - BuildEnvironment env; - TrtUniquePtr engine; - if (getEngineBuildEnv(model, build, sys, env, err)) { - engine.swap(env.engine); - } - return engine; -} - -//! -//! \brief Create a serialized network -//! -//! \return Pointer to a host memory for a serialized network -//! -IHostMemory* networkToSerialized(const BuildOptions& build, - const SystemOptions& sys, IBuilder& builder, - INetworkDefinition& network, - std::ostream& err); - -//! -//! \brief Tranfer model to a serialized network -//! -//! \return Pointer to a host memory for a serialized network -//! -IHostMemory* modelToSerialized(const ModelOptions& model, - const BuildOptions& build, - const SystemOptions& sys, std::ostream& err); - -//! -//! \brief Serialize network and save it into a file -//! -//! \return boolean Return true if the network was successfully serialized and -//! saved -//! -bool serializeAndSave(const ModelOptions& model, const BuildOptions& build, - const SystemOptions& sys, std::ostream& err); - -bool timeRefit(const INetworkDefinition& network, nvinfer1::ICudaEngine& engine, - bool multiThreading); - -//! -//! \brief Set tensor scales from a calibration table -//! -void setTensorScalesFromCalibration(nvinfer1::INetworkDefinition& network, - const std::vector& inputFormats, - const std::vector& outputFormats, - const std::string& calibrationFile); - -//! -//! \brief Check if safe runtime is loaded. -//! -bool hasSafeRuntime(); - -//! -//! \brief Create a safe runtime object if the dynamic library is loaded. -//! -nvinfer1::safe::IRuntime* -createSafeInferRuntime(nvinfer1::ILogger& logger) noexcept; - -//! -//! \brief Check if consistency checker is loaded. -//! -bool hasConsistencyChecker(); - -//! -//! \brief Create a consistency checker object if the dynamic library is loaded. -//! -nvinfer1::consistency::IConsistencyChecker* -createConsistencyChecker(nvinfer1::ILogger& logger, - IHostMemory const* engine) noexcept; - -//! -//! \brief Run consistency check on serialized engine. -//! -bool checkSafeEngine(void const* serializedEngine, int32_t const engineSize); -} // namespace sample - -#endif // TRT_SAMPLE_ENGINES_H diff --git a/csrcs/fastdeploy/backends/tensorrt/common/sampleInference.cpp b/csrcs/fastdeploy/backends/tensorrt/common/sampleInference.cpp deleted file mode 100644 index fd7e9f82f..000000000 --- a/csrcs/fastdeploy/backends/tensorrt/common/sampleInference.cpp +++ /dev/null @@ -1,943 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if defined(__QNX__) -#include -#include -#endif - -#include "NvInfer.h" - -#include "ErrorRecorder.h" -#include "logger.h" -#include "sampleDevice.h" -#include "sampleEngines.h" -#include "sampleInference.h" -#include "sampleOptions.h" -#include "sampleReporting.h" -#include "sampleUtils.h" - -namespace sample { - -template -bool validateTensorNames(const MapType& map, const EngineType* engine, - const int32_t endBindingIndex) { - // Check if the provided input tensor names match the input tensors of the - // engine. - // Throw an error if the provided input tensor names cannot be found because - // it implies a potential typo. - for (const auto& item : map) { - bool tensorNameFound{false}; - for (int32_t b = 0; b < endBindingIndex; ++b) { - if (engine->bindingIsInput(b) && - engine->getBindingName(b) == item.first) { - tensorNameFound = true; - break; - } - } - if (!tensorNameFound) { - sample::gLogError - << "Cannot find input tensor with name \"" << item.first - << "\" in the engine bindings! " - << "Please make sure the input tensor names are correct." - << std::endl; - return false; - } - } - return true; -} - -template class FillBindingClosure { - private: - using InputsMap = std::unordered_map; - using BindingsVector = std::vector>; - - EngineType const* engine; - ContextType const* context; - InputsMap const& inputs; - BindingsVector& bindings; - int32_t batch; - int32_t endBindingIndex; - - void fillOneBinding(int32_t bindingIndex, int64_t vol) { - auto const dims = getDims(bindingIndex); - auto const name = engine->getBindingName(bindingIndex); - auto const isInput = engine->bindingIsInput(bindingIndex); - auto const dataType = engine->getBindingDataType(bindingIndex); - auto const* bindingInOutStr = isInput ? "input" : "output"; - for (auto& binding : bindings) { - const auto input = inputs.find(name); - if (isInput && input != inputs.end()) { - sample::gLogInfo << "Using values loaded from " << input->second - << " for input " << name << std::endl; - binding->addBinding(bindingIndex, name, isInput, vol, dataType, - input->second); - } else { - sample::gLogInfo << "Using random values for " << bindingInOutStr << " " - << name << std::endl; - binding->addBinding(bindingIndex, name, isInput, vol, dataType); - } - sample::gLogInfo << "Created " << bindingInOutStr << " binding for " - << name << " with dimensions " << dims << std::endl; - } - } - - bool fillAllBindings(int32_t batch, int32_t endBindingIndex) { - if (!validateTensorNames(inputs, engine, endBindingIndex)) { - sample::gLogError << "Invalid tensor names found in --loadInputs flag." - << std::endl; - return false; - } - - for (int32_t b = 0; b < endBindingIndex; b++) { - auto const dims = getDims(b); - auto const comps = engine->getBindingComponentsPerElement(b); - auto const strides = context->getStrides(b); - int32_t const vectorDimIndex = engine->getBindingVectorizedDim(b); - auto const vol = volume(dims, strides, vectorDimIndex, comps, batch); - fillOneBinding(b, vol); - } - return true; - } - - Dims getDims(int32_t bindingIndex); - - public: - FillBindingClosure(EngineType const* _engine, ContextType const* _context, - InputsMap const& _inputs, BindingsVector& _bindings, - int32_t _batch, int32_t _endBindingIndex) - : engine(_engine), context(_context), inputs(_inputs), - bindings(_bindings), batch(_batch), endBindingIndex(_endBindingIndex) {} - - bool operator()() { return fillAllBindings(batch, endBindingIndex); } -}; - -template <> -Dims FillBindingClosure:: - getDims(int32_t bindingIndex) { - return context->getBindingDimensions(bindingIndex); -} - -template <> -Dims FillBindingClosure< - nvinfer1::safe::ICudaEngine, - nvinfer1::safe::IExecutionContext>::getDims(int32_t bindingIndex) { - return engine->getBindingDimensions(bindingIndex); -} - -bool setUpInference(InferenceEnvironment& iEnv, - const InferenceOptions& inference) { - int32_t device{}; - cudaCheck(cudaGetDevice(&device)); - - cudaDeviceProp properties; - cudaCheck(cudaGetDeviceProperties(&properties, device)); - // Use managed memory on integrated devices when transfers are skipped - // and when it is explicitly requested on the commandline. - bool useManagedMemory{(inference.skipTransfers && properties.integrated) || - inference.useManaged}; - using FillSafeBindings = - FillBindingClosure; - if (iEnv.safe) { - ASSERT(sample::hasSafeRuntime()); - auto* safeEngine = iEnv.safeEngine.get(); - for (int32_t s = 0; s < inference.streams; ++s) { - iEnv.safeContext.emplace_back(safeEngine->createExecutionContext()); - iEnv.bindings.emplace_back(new Bindings(useManagedMemory)); - } - const int32_t nBindings = safeEngine->getNbBindings(); - auto const* safeContext = iEnv.safeContext.front().get(); - // batch is set to 1 because safety only support explicit batch. - return FillSafeBindings(iEnv.safeEngine.get(), safeContext, - inference.inputs, iEnv.bindings, 1, nBindings)(); - } - - using FillStdBindings = - FillBindingClosure; - - for (int32_t s = 0; s < inference.streams; ++s) { - auto ec = iEnv.engine->createExecutionContext(); - if (ec == nullptr) { - sample::gLogError << "Unable to create execution context for stream " << s - << "." << std::endl; - return false; - } - iEnv.context.emplace_back(ec); - iEnv.bindings.emplace_back(new Bindings(useManagedMemory)); - } - if (iEnv.profiler) { - iEnv.context.front()->setProfiler(iEnv.profiler.get()); - // Always run reportToProfiler() after enqueue launch - iEnv.context.front()->setEnqueueEmitsProfile(false); - } - - const int32_t nOptProfiles = iEnv.engine->getNbOptimizationProfiles(); - const int32_t nBindings = iEnv.engine->getNbBindings(); - const int32_t bindingsInProfile = - nOptProfiles > 0 ? nBindings / nOptProfiles : 0; - const int32_t endBindingIndex = - bindingsInProfile ? bindingsInProfile : iEnv.engine->getNbBindings(); - - if (nOptProfiles > 1) { - sample::gLogWarning << "Multiple profiles are currently not supported. " - "Running with one profile." - << std::endl; - } - - // Make sure that the tensor names provided in command-line args actually - // exist in any of the engine bindings - // to avoid silent typos. - if (!validateTensorNames(inference.shapes, iEnv.engine.get(), - endBindingIndex)) { - sample::gLogError << "Invalid tensor names found in --shapes flag." - << std::endl; - return false; - } - - // Set all input dimensions before all bindings can be allocated - for (int32_t b = 0; b < endBindingIndex; ++b) { - if (iEnv.engine->bindingIsInput(b)) { - auto dims = iEnv.context.front()->getBindingDimensions(b); - const bool isScalar = dims.nbDims == 0; - const bool isDynamicInput = - std::any_of(dims.d, dims.d + dims.nbDims, - [](int32_t dim) { return dim == -1; }) || - iEnv.engine->isShapeBinding(b); - if (isDynamicInput) { - auto shape = inference.shapes.find(iEnv.engine->getBindingName(b)); - - std::vector staticDims; - if (shape == inference.shapes.end()) { - // If no shape is provided, set dynamic dimensions to 1. - constexpr int32_t DEFAULT_DIMENSION = 1; - if (iEnv.engine->isShapeBinding(b)) { - if (isScalar) { - staticDims.push_back(1); - } else { - staticDims.resize(dims.d[0]); - std::fill(staticDims.begin(), staticDims.end(), - DEFAULT_DIMENSION); - } - } else { - staticDims.resize(dims.nbDims); - std::transform(dims.d, dims.d + dims.nbDims, staticDims.begin(), - [&](int32_t dimension) { - return dimension >= 0 ? dimension - : DEFAULT_DIMENSION; - }); - } - sample::gLogWarning << "Dynamic dimensions required for input: " - << iEnv.engine->getBindingName(b) - << ", but no shapes were provided. Automatically " - "overriding shape to: " - << staticDims << std::endl; - } else if (inference.inputs.count(shape->first) && - iEnv.engine->isShapeBinding(b)) { - if (isScalar || dims.nbDims == 1) { - // Load shape tensor from file. - size_t const size = isScalar ? 1 : dims.d[0]; - staticDims.resize(size); - auto const& filename = inference.inputs.at(shape->first); - auto dst = reinterpret_cast(staticDims.data()); - loadFromFile(filename, dst, - size * sizeof(decltype(staticDims)::value_type)); - } else { - sample::gLogWarning << "Cannot load shape tensor " << shape->first - << " from file, " - << "ND-Shape isn't supported yet" << std::endl; - // Fallback - staticDims = shape->second; - } - } else { - staticDims = shape->second; - } - - for (auto& c : iEnv.context) { - if (iEnv.engine->isShapeBinding(b)) { - if (!c->setInputShapeBinding(b, staticDims.data())) { - return false; - } - } else { - if (!c->setBindingDimensions(b, toDims(staticDims))) { - return false; - } - } - } - } - } - } - - auto* engine = iEnv.engine.get(); - auto const* context = iEnv.context.front().get(); - int32_t const batch = - engine->hasImplicitBatchDimension() ? inference.batch : 1; - return FillStdBindings(engine, context, inference.inputs, iEnv.bindings, - batch, endBindingIndex)(); -} - -namespace { - -#if defined(__QNX__) -using TimePoint = double; -#else -using TimePoint = std::chrono::time_point; -#endif - -TimePoint getCurrentTime() { -#if defined(__QNX__) - uint64_t const currentCycles = ClockCycles(); - uint64_t const cyclesPerSecond = SYSPAGE_ENTRY(qtime)->cycles_per_sec; - // Return current timestamp in ms. - return static_cast(currentCycles) * 1000. / cyclesPerSecond; -#else - return std::chrono::high_resolution_clock::now(); -#endif -} - -//! -//! \struct SyncStruct -//! \brief Threads synchronization structure -//! -struct SyncStruct { - std::mutex mutex; - TrtCudaStream mainStream; - TrtCudaEvent gpuStart{cudaEventBlockingSync}; - TimePoint cpuStart{}; - float sleep{}; -}; - -struct Enqueue { - explicit Enqueue(nvinfer1::IExecutionContext& context, void** buffers) - : mContext(context), mBuffers(buffers) {} - - nvinfer1::IExecutionContext& mContext; - void** mBuffers{}; -}; - -//! -//! \class EnqueueImplicit -//! \brief Functor to enqueue inference with implict batch -//! -class EnqueueImplicit : private Enqueue { - public: - explicit EnqueueImplicit(nvinfer1::IExecutionContext& context, void** buffers, - int32_t batch) - : Enqueue(context, buffers), mBatch(batch) {} - - bool operator()(TrtCudaStream& stream) const { - if (mContext.enqueue(mBatch, mBuffers, stream.get(), nullptr)) { - // Collecting layer timing info from current profile index of execution - // context - if (mContext.getProfiler() && !mContext.getEnqueueEmitsProfile() && - !mContext.reportToProfiler()) { - gLogWarning - << "Failed to collect layer timing info from previous enqueue()" - << std::endl; - } - return true; - } - return false; - } - - private: - int32_t mBatch; -}; - -//! -//! \class EnqueueExplicit -//! \brief Functor to enqueue inference with explict batch -//! -class EnqueueExplicit : private Enqueue { - public: - explicit EnqueueExplicit(nvinfer1::IExecutionContext& context, void** buffers) - : Enqueue(context, buffers) {} - - bool operator()(TrtCudaStream& stream) const { - if (mContext.enqueueV2(mBuffers, stream.get(), nullptr)) { - // Collecting layer timing info from current profile index of execution - // context - if (mContext.getProfiler() && !mContext.getEnqueueEmitsProfile() && - !mContext.reportToProfiler()) { - gLogWarning - << "Failed to collect layer timing info from previous enqueueV2()" - << std::endl; - } - return true; - } - return false; - } -}; - -//! -//! \class EnqueueGraph -//! \brief Functor to enqueue inference from CUDA Graph -//! -class EnqueueGraph { - public: - explicit EnqueueGraph(nvinfer1::IExecutionContext& context, - TrtCudaGraph& graph) - : mGraph(graph), mContext(context) {} - - bool operator()(TrtCudaStream& stream) const { - if (mGraph.launch(stream)) { - // Collecting layer timing info from current profile index of execution - // context - if (mContext.getProfiler() && !mContext.reportToProfiler()) { - gLogWarning << "Failed to collect layer timing info from previous CUDA " - "graph launch" - << std::endl; - } - return true; - } - return false; - } - - TrtCudaGraph& mGraph; - nvinfer1::IExecutionContext& mContext; -}; - -//! -//! \class EnqueueSafe -//! \brief Functor to enqueue safe execution context -//! -class EnqueueSafe { - public: - explicit EnqueueSafe(nvinfer1::safe::IExecutionContext& context, - void** buffers) - : mContext(context), mBuffers(buffers) {} - - bool operator()(TrtCudaStream& stream) const { - if (mContext.enqueueV2(mBuffers, stream.get(), nullptr)) { - return true; - } - return false; - } - - nvinfer1::safe::IExecutionContext& mContext; - void** mBuffers{}; -}; - -using EnqueueFunction = std::function; - -enum class StreamType : int32_t { - kINPUT = 0, - kCOMPUTE = 1, - kOUTPUT = 2, - kNUM = 3 -}; - -enum class EventType : int32_t { - kINPUT_S = 0, - kINPUT_E = 1, - kCOMPUTE_S = 2, - kCOMPUTE_E = 3, - kOUTPUT_S = 4, - kOUTPUT_E = 5, - kNUM = 6 -}; - -using MultiStream = - std::array(StreamType::kNUM)>; - -using MultiEvent = std::array, - static_cast(EventType::kNUM)>; - -using EnqueueTimes = std::array; - -//! -//! \class Iteration -//! \brief Inference iteration and streams management -//! -template class Iteration { - public: - Iteration(int32_t id, const InferenceOptions& inference, ContextType& context, - Bindings& bindings) - : mBindings(bindings), mStreamId(id), mDepth(1 + inference.overlap), - mActive(mDepth), mEvents(mDepth), mEnqueueTimes(mDepth), - mContext(&context) { - for (int32_t d = 0; d < mDepth; ++d) { - for (int32_t e = 0; e < static_cast(EventType::kNUM); ++e) { - mEvents[d][e].reset(new TrtCudaEvent(!inference.spin)); - } - } - createEnqueueFunction(inference, context, bindings); - } - - bool query(bool skipTransfers) { - if (mActive[mNext]) { - return true; - } - - if (!skipTransfers) { - record(EventType::kINPUT_S, StreamType::kINPUT); - mBindings.transferInputToDevice(getStream(StreamType::kINPUT)); - record(EventType::kINPUT_E, StreamType::kINPUT); - wait(EventType::kINPUT_E, - StreamType::kCOMPUTE); // Wait for input DMA before compute - } - - record(EventType::kCOMPUTE_S, StreamType::kCOMPUTE); - recordEnqueueTime(); - if (!mEnqueue(getStream(StreamType::kCOMPUTE))) { - return false; - } - recordEnqueueTime(); - record(EventType::kCOMPUTE_E, StreamType::kCOMPUTE); - - if (!skipTransfers) { - wait(EventType::kCOMPUTE_E, - StreamType::kOUTPUT); // Wait for compute before output DMA - record(EventType::kOUTPUT_S, StreamType::kOUTPUT); - mBindings.transferOutputToHost(getStream(StreamType::kOUTPUT)); - record(EventType::kOUTPUT_E, StreamType::kOUTPUT); - } - - mActive[mNext] = true; - moveNext(); - return true; - } - - float sync(const TimePoint& cpuStart, const TrtCudaEvent& gpuStart, - std::vector& trace, bool skipTransfers) { - if (mActive[mNext]) { - if (skipTransfers) { - getEvent(EventType::kCOMPUTE_E).synchronize(); - } else { - getEvent(EventType::kOUTPUT_E).synchronize(); - } - trace.emplace_back(getTrace(cpuStart, gpuStart, skipTransfers)); - mActive[mNext] = false; - return getEvent(EventType::kCOMPUTE_S) - gpuStart; - } - return 0; - } - - void syncAll(const TimePoint& cpuStart, const TrtCudaEvent& gpuStart, - std::vector& trace, bool skipTransfers) { - for (int32_t d = 0; d < mDepth; ++d) { - sync(cpuStart, gpuStart, trace, skipTransfers); - moveNext(); - } - } - - void wait(TrtCudaEvent& gpuStart) { - getStream(StreamType::kINPUT).wait(gpuStart); - } - - void setInputData() { - mBindings.transferInputToDevice(getStream(StreamType::kINPUT)); - } - - void fetchOutputData() { - mBindings.transferOutputToHost(getStream(StreamType::kOUTPUT)); - } - - private: - void moveNext() { mNext = mDepth - 1 - mNext; } - - TrtCudaStream& getStream(StreamType t) { - return mStream[static_cast(t)]; - } - - TrtCudaEvent& getEvent(EventType t) { - return *mEvents[mNext][static_cast(t)]; - } - - void record(EventType e, StreamType s) { getEvent(e).record(getStream(s)); } - - void recordEnqueueTime() { - mEnqueueTimes[mNext][enqueueStart] = getCurrentTime(); - enqueueStart = 1 - enqueueStart; - } - - TimePoint getEnqueueTime(bool start) { - return mEnqueueTimes[mNext][start ? 0 : 1]; - } - - void wait(EventType e, StreamType s) { getStream(s).wait(getEvent(e)); } - - InferenceTrace getTrace(const TimePoint& cpuStart, - const TrtCudaEvent& gpuStart, bool skipTransfers) { - float is = skipTransfers ? getEvent(EventType::kCOMPUTE_S) - gpuStart - : getEvent(EventType::kINPUT_S) - gpuStart; - float ie = skipTransfers ? getEvent(EventType::kCOMPUTE_S) - gpuStart - : getEvent(EventType::kINPUT_E) - gpuStart; - float os = skipTransfers ? getEvent(EventType::kCOMPUTE_E) - gpuStart - : getEvent(EventType::kOUTPUT_S) - gpuStart; - float oe = skipTransfers ? getEvent(EventType::kCOMPUTE_E) - gpuStart - : getEvent(EventType::kOUTPUT_E) - gpuStart; - - return InferenceTrace(mStreamId, - std::chrono::duration( - getEnqueueTime(true) - cpuStart) - .count(), - std::chrono::duration( - getEnqueueTime(false) - cpuStart) - .count(), - is, ie, getEvent(EventType::kCOMPUTE_S) - gpuStart, - getEvent(EventType::kCOMPUTE_E) - gpuStart, os, oe); - } - - void createEnqueueFunction(const InferenceOptions& inference, - nvinfer1::IExecutionContext& context, - Bindings& bindings) { - if (inference.batch) { - mEnqueue = EnqueueFunction(EnqueueImplicit( - context, mBindings.getDeviceBuffers(), inference.batch)); - } else { - mEnqueue = EnqueueFunction( - EnqueueExplicit(context, mBindings.getDeviceBuffers())); - } - if (inference.graph) { - TrtCudaStream& stream = getStream(StreamType::kCOMPUTE); - // Avoid capturing initialization calls by executing the enqueue function - // at least - // once before starting CUDA graph capture. - const auto ret = mEnqueue(stream); - assert(ret); - stream.synchronize(); - - mGraph.beginCapture(stream); - // The built TRT engine may contain operations that are not permitted - // under CUDA graph capture mode. - // When the stream is capturing, the enqueue call may return false if the - // current CUDA graph capture fails. - if (mEnqueue(stream)) { - mGraph.endCapture(stream); - mEnqueue = EnqueueFunction(EnqueueGraph(context, mGraph)); - } else { - mGraph.endCaptureOnError(stream); - // Ensure any CUDA error has been cleaned up. - cudaCheck(cudaGetLastError()); - sample::gLogWarning << "The built TensorRT engine contains operations " - "that are not permitted under " - "CUDA graph capture mode." - << std::endl; - sample::gLogWarning << "The specified --useCudaGraph flag has been " - "ignored. The inference will be " - "launched without using CUDA graph launch." - << std::endl; - } - } - } - - void createEnqueueFunction(const InferenceOptions&, - nvinfer1::safe::IExecutionContext& context, - Bindings&) { - mEnqueue = - EnqueueFunction(EnqueueSafe(context, mBindings.getDeviceBuffers())); - } - - Bindings& mBindings; - - TrtCudaGraph mGraph; - EnqueueFunction mEnqueue; - - int32_t mStreamId{0}; - int32_t mNext{0}; - int32_t mDepth{2}; // default to double buffer to hide DMA transfers - - std::vector mActive; - MultiStream mStream; - std::vector mEvents; - - int32_t enqueueStart{0}; - std::vector mEnqueueTimes; - ContextType* mContext{nullptr}; -}; - -template -bool inferenceLoop( - std::vector>>& iStreams, - const TimePoint& cpuStart, const TrtCudaEvent& gpuStart, int iterations, - float maxDurationMs, float warmupMs, std::vector& trace, - bool skipTransfers, float idleMs) { - float durationMs = 0; - int32_t skip = 0; - - for (int32_t i = 0; i < iterations + skip || durationMs < maxDurationMs; - ++i) { - for (auto& s : iStreams) { - if (!s->query(skipTransfers)) { - return false; - } - } - for (auto& s : iStreams) { - durationMs = std::max(durationMs, - s->sync(cpuStart, gpuStart, trace, skipTransfers)); - } - if (durationMs < warmupMs) // Warming up - { - if (durationMs) // Skip complete iterations - { - ++skip; - } - continue; - } - if (idleMs != 0.F) { - std::this_thread::sleep_for( - std::chrono::duration(idleMs)); - } - } - for (auto& s : iStreams) { - s->syncAll(cpuStart, gpuStart, trace, skipTransfers); - } - return true; -} - -template -void inferenceExecution(const InferenceOptions& inference, - InferenceEnvironment& iEnv, SyncStruct& sync, - const int32_t threadIdx, const int32_t streamsPerThread, - int32_t device, std::vector& trace) { - float warmupMs = inference.warmup; - float durationMs = inference.duration * 1000.F + warmupMs; - - cudaCheck(cudaSetDevice(device)); - - std::vector>> iStreams; - - for (int32_t s = 0; s < streamsPerThread; ++s) { - const int32_t streamId{threadIdx * streamsPerThread + s}; - auto* iteration = new Iteration( - streamId, inference, *iEnv.template getContext(streamId), - *iEnv.bindings[streamId]); - if (inference.skipTransfers) { - iteration->setInputData(); - } - iStreams.emplace_back(iteration); - } - - for (auto& s : iStreams) { - s->wait(sync.gpuStart); - } - - std::vector localTrace; - if (!inferenceLoop(iStreams, sync.cpuStart, sync.gpuStart, - inference.iterations, durationMs, warmupMs, localTrace, - inference.skipTransfers, inference.idle)) { - iEnv.error = true; - } - - if (inference.skipTransfers) { - for (auto& s : iStreams) { - s->fetchOutputData(); - } - } - - sync.mutex.lock(); - trace.insert(trace.end(), localTrace.begin(), localTrace.end()); - sync.mutex.unlock(); -} - -inline std::thread makeThread(const InferenceOptions& inference, - InferenceEnvironment& iEnv, SyncStruct& sync, - int32_t threadIdx, int32_t streamsPerThread, - int32_t device, - std::vector& trace) { - if (iEnv.safe) { - ASSERT(sample::hasSafeRuntime()); - return std::thread(inferenceExecution, - std::cref(inference), std::ref(iEnv), std::ref(sync), - threadIdx, streamsPerThread, device, std::ref(trace)); - } - - return std::thread(inferenceExecution, - std::cref(inference), std::ref(iEnv), std::ref(sync), - threadIdx, streamsPerThread, device, std::ref(trace)); -} - -} // namespace - -bool runInference(const InferenceOptions& inference, InferenceEnvironment& iEnv, - int32_t device, std::vector& trace) { - cudaCheck(cudaProfilerStart()); - - trace.resize(0); - - SyncStruct sync; - sync.sleep = inference.sleep; - sync.mainStream.sleep(&sync.sleep); - sync.cpuStart = getCurrentTime(); - sync.gpuStart.record(sync.mainStream); - - // When multiple streams are used, trtexec can run inference in two modes: - // (1) if inference.threads is true, then run each stream on each thread. - // (2) if inference.threads is false, then run all streams on the same thread. - const int32_t numThreads = inference.threads ? inference.streams : 1; - const int32_t streamsPerThread = inference.threads ? 1 : inference.streams; - - std::vector threads; - for (int32_t threadIdx = 0; threadIdx < numThreads; ++threadIdx) { - threads.emplace_back(makeThread(inference, iEnv, sync, threadIdx, - streamsPerThread, device, trace)); - } - for (auto& th : threads) { - th.join(); - } - - cudaCheck(cudaProfilerStop()); - - auto cmpTrace = [](const InferenceTrace& a, const InferenceTrace& b) { - return a.h2dStart < b.h2dStart; - }; - std::sort(trace.begin(), trace.end(), cmpTrace); - - return !iEnv.error; -} - -namespace { -size_t reportGpuMemory() { - static size_t prevFree{0}; - size_t free{0}; - size_t total{0}; - size_t newlyAllocated{0}; - cudaCheck(cudaMemGetInfo(&free, &total)); - sample::gLogInfo << "Free GPU memory = " << free / 1024.0_MiB << " GiB"; - if (prevFree != 0) { - newlyAllocated = (prevFree - free); - sample::gLogInfo << ", newly allocated GPU memory = " - << newlyAllocated / 1024.0_MiB << " GiB"; - } - sample::gLogInfo << ", total GPU memory = " << total / 1024.0_MiB << " GiB" - << std::endl; - prevFree = free; - return newlyAllocated; -} -} // namespace - -//! Returns true if deserialization is slower than expected or fails. -bool timeDeserialize(InferenceEnvironment& iEnv) { - constexpr int32_t kNB_ITERS{20}; - std::unique_ptr rt{ - createInferRuntime(sample::gLogger.getTRTLogger())}; - std::unique_ptr engine; - - std::unique_ptr safeRT{ - sample::createSafeInferRuntime(sample::gLogger.getTRTLogger())}; - std::unique_ptr safeEngine; - - if (iEnv.safe) { - ASSERT(sample::hasSafeRuntime() && safeRT != nullptr); - safeRT->setErrorRecorder(&gRecorder); - } - - auto timeDeserializeFn = [&]() -> float { - bool deserializeOK{false}; - engine.reset(nullptr); - safeEngine.reset(nullptr); - auto startClock = std::chrono::high_resolution_clock::now(); - if (iEnv.safe) { - safeEngine.reset(safeRT->deserializeCudaEngine(iEnv.engineBlob.data(), - iEnv.engineBlob.size())); - deserializeOK = (safeEngine != nullptr); - } else { - engine.reset(rt->deserializeCudaEngine(iEnv.engineBlob.data(), - iEnv.engineBlob.size(), nullptr)); - deserializeOK = (engine != nullptr); - } - auto endClock = std::chrono::high_resolution_clock::now(); - // return NAN if deserialization failed. - return deserializeOK - ? std::chrono::duration(endClock - startClock) - .count() - : NAN; - }; - - // Warmup the caches to make sure that cache thrashing isn't throwing off the - // results - { - sample::gLogInfo << "Begin deserialization warmup..." << std::endl; - for (int32_t i = 0, e = 2; i < e; ++i) { - timeDeserializeFn(); - } - } - sample::gLogInfo << "Begin deserialization engine timing..." << std::endl; - float const first = timeDeserializeFn(); - - // Check if first deserialization suceeded. - if (std::isnan(first)) { - sample::gLogError << "Engine deserialization failed." << std::endl; - return true; - } - - sample::gLogInfo << "First deserialization time = " << first - << " milliseconds" << std::endl; - - // Record initial gpu memory state. - reportGpuMemory(); - - float totalTime{0.F}; - for (int32_t i = 0; i < kNB_ITERS; ++i) { - totalTime += timeDeserializeFn(); - } - const auto averageTime = totalTime / kNB_ITERS; - // reportGpuMemory sometimes reports zero after a single deserialization of a - // small engine, - // so use the size of memory for all the iterations. - const auto totalEngineSizeGpu = reportGpuMemory(); - sample::gLogInfo << "Total deserialization time = " << totalTime - << " milliseconds in " << kNB_ITERS - << " iterations, average time = " << averageTime - << " milliseconds, first time = " << first - << " milliseconds." << std::endl; - sample::gLogInfo << "Deserialization Bandwidth = " - << 1E-6 * totalEngineSizeGpu / totalTime << " GB/s" - << std::endl; - - // If the first deserialization is more than tolerance slower than - // the average deserialization, return true, which means an error occurred. - // The tolerance is set to 2x since the deserialization time is quick and - // susceptible - // to caching issues causing problems in the first timing. - const auto tolerance = 2.0F; - const bool isSlowerThanExpected = first > averageTime * tolerance; - if (isSlowerThanExpected) { - sample::gLogInfo << "First deserialization time divided by average time is " - << (first / averageTime) << ". Exceeds tolerance of " - << tolerance << "x." << std::endl; - } - return isSlowerThanExpected; -} - -std::string getLayerInformation(const InferenceEnvironment& iEnv, - nvinfer1::LayerInformationFormat format) { - auto runtime = std::unique_ptr( - createInferRuntime(sample::gLogger.getTRTLogger())); - auto inspector = - std::unique_ptr(iEnv.engine->createEngineInspector()); - if (!iEnv.context.empty()) { - inspector->setExecutionContext(iEnv.context.front().get()); - } - std::string result = inspector->getEngineInformation(format); - return result; -} - -} // namespace sample diff --git a/csrcs/fastdeploy/backends/tensorrt/common/sampleInference.h b/csrcs/fastdeploy/backends/tensorrt/common/sampleInference.h deleted file mode 100644 index 700dc8bef..000000000 --- a/csrcs/fastdeploy/backends/tensorrt/common/sampleInference.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef TRT_SAMPLE_INFERENCE_H -#define TRT_SAMPLE_INFERENCE_H - -#include "sampleReporting.h" -#include "sampleUtils.h" - -#include -#include -#include -#include - -#include "NvInfer.h" -#include "NvInferSafeRuntime.h" - -namespace sample { - -struct InferenceEnvironment { - TrtUniquePtr engine; - std::unique_ptr profiler; - std::vector> context; - std::vector> bindings; - bool error{false}; - - std::vector engineBlob; - - bool safe{false}; - std::unique_ptr safeEngine; - std::vector> safeContext; - - template - inline ContextType* getContext(int32_t streamIdx); -}; - -template <> -inline nvinfer1::IExecutionContext* -InferenceEnvironment::getContext(int32_t streamIdx) { - return context[streamIdx].get(); -} - -template <> -inline nvinfer1::safe::IExecutionContext* -InferenceEnvironment::getContext(int32_t streamIdx) { - return safeContext[streamIdx].get(); -} - -//! -//! \brief Set up contexts and bindings for inference -//! -bool setUpInference(InferenceEnvironment& iEnv, - const InferenceOptions& inference); - -//! -//! \brief Deserialize the engine and time how long it takes. -//! -bool timeDeserialize(InferenceEnvironment& iEnv); - -//! -//! \brief Run inference and collect timing, return false if any error hit -//! during inference -//! -bool runInference(const InferenceOptions& inference, InferenceEnvironment& iEnv, - int32_t device, std::vector& trace); - -//! -//! \brief Get layer information of the engine. -//! -std::string getLayerInformation(const InferenceEnvironment& iEnv, - nvinfer1::LayerInformationFormat format); - -} // namespace sample - -#endif // TRT_SAMPLE_INFERENCE_H diff --git a/csrcs/fastdeploy/backends/tensorrt/common/sampleOptions.cpp b/csrcs/fastdeploy/backends/tensorrt/common/sampleOptions.cpp deleted file mode 100644 index a01b4dfde..000000000 --- a/csrcs/fastdeploy/backends/tensorrt/common/sampleOptions.cpp +++ /dev/null @@ -1,1634 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "NvInfer.h" - -#include "logger.h" -#include "sampleOptions.h" - -namespace sample { - -namespace { - -std::vector splitToStringVec(const std::string& option, - char separator) { - std::vector options; - - for (size_t start = 0; start < option.length();) { - size_t separatorIndex = option.find(separator, start); - if (separatorIndex == std::string::npos) { - separatorIndex = option.length(); - } - options.emplace_back(option.substr(start, separatorIndex - start)); - start = separatorIndex + 1; - } - - return options; -} - -template T stringToValue(const std::string& option) { - return T{option}; -} - -template <> int32_t stringToValue(const std::string& option) { - return std::stoi(option); -} - -template <> float stringToValue(const std::string& option) { - return std::stof(option); -} - -template <> double stringToValue(const std::string& option) { - return std::stod(option); -} - -template <> bool stringToValue(const std::string& option) { return true; } - -template <> -std::vector -stringToValue>(const std::string& option) { - std::vector shape; - std::vector dimsStrings = splitToStringVec(option, 'x'); - for (const auto& d : dimsStrings) { - shape.push_back(stringToValue(d)); - } - return shape; -} - -template <> -nvinfer1::DataType -stringToValue(const std::string& option) { - const std::unordered_map strToDT{ - {"fp32", nvinfer1::DataType::kFLOAT}, - {"fp16", nvinfer1::DataType::kHALF}, - {"int8", nvinfer1::DataType::kINT8}, - {"int32", nvinfer1::DataType::kINT32}}; - const auto& dt = strToDT.find(option); - if (dt == strToDT.end()) { - throw std::invalid_argument("Invalid DataType " + option); - } - return dt->second; -} - -template <> -nvinfer1::TensorFormats -stringToValue(const std::string& option) { - std::vector optionStrings = splitToStringVec(option, '+'); - const std::unordered_map strToFmt{ - {"chw", nvinfer1::TensorFormat::kLINEAR}, - {"chw2", nvinfer1::TensorFormat::kCHW2}, - {"chw4", nvinfer1::TensorFormat::kCHW4}, - {"hwc8", nvinfer1::TensorFormat::kHWC8}, - {"chw16", nvinfer1::TensorFormat::kCHW16}, - {"chw32", nvinfer1::TensorFormat::kCHW32}, - {"dhwc8", nvinfer1::TensorFormat::kDHWC8}, - {"hwc", nvinfer1::TensorFormat::kHWC}, - {"dla_linear", nvinfer1::TensorFormat::kDLA_LINEAR}, - {"dla_hwc4", nvinfer1::TensorFormat::kDLA_HWC4}}; - nvinfer1::TensorFormats formats{}; - for (auto f : optionStrings) { - const auto& tf = strToFmt.find(f); - if (tf == strToFmt.end()) { - throw std::invalid_argument(std::string("Invalid TensorFormat ") + f); - } - formats |= 1U << static_cast(tf->second); - } - - return formats; -} - -template <> IOFormat stringToValue(const std::string& option) { - IOFormat ioFormat{}; - const size_t colon = option.find(':'); - - if (colon == std::string::npos) { - throw std::invalid_argument(std::string("Invalid IOFormat ") + option); - } - - ioFormat.first = stringToValue(option.substr(0, colon)); - ioFormat.second = - stringToValue(option.substr(colon + 1)); - - return ioFormat; -} - -template -std::pair splitNameAndValue(const std::string& s) { - std::string tensorName; - std::string valueString; - // Split on the last : - std::vector nameRange{splitToStringVec(s, ':')}; - // Everything before the last : is the name - tensorName = nameRange[0]; - for (size_t i = 1; i < nameRange.size() - 1; i++) { - tensorName += ":" + nameRange[i]; - } - // Value is the string element after the last : - valueString = nameRange[nameRange.size() - 1]; - return std::pair(tensorName, stringToValue(valueString)); -} - -template -void splitInsertKeyValue(const std::vector& kvList, T& map) { - for (const auto& kv : kvList) { - map.insert(splitNameAndValue(kv)); - } -} - -const char* boolToEnabled(bool enable) { - return enable ? "Enabled" : "Disabled"; -} - -//! Check if input option exists in input arguments. -//! If it does: return its value, erase the argument and return true. -//! If it does not: return false. -template -bool getAndDelOption(Arguments& arguments, const std::string& option, - T& value) { - const auto match = arguments.find(option); - if (match != arguments.end()) { - value = stringToValue(match->second); - arguments.erase(match); - return true; - } - - return false; -} - -//! Check if input option exists in input arguments. -//! If it does: return false in value, erase the argument and return true. -//! If it does not: return false. -bool getAndDelNegOption(Arguments& arguments, const std::string& option, - bool& value) { - bool dummy; - if (getAndDelOption(arguments, option, dummy)) { - value = false; - return true; - } - return false; -} - -//! Check if input option exists in input arguments. -//! If it does: add all the matched arg values to values vector, erase the -//! argument and return true. -//! If it does not: return false. -template -bool getAndDelRepeatedOption(Arguments& arguments, const std::string& option, - std::vector& values) { - const auto match = arguments.equal_range(option); - if (match.first == match.second) { - return false; - } - - auto addToValues = [&values](Arguments::value_type& argValue) { - values.emplace_back(stringToValue(argValue.second)); - }; - std::for_each(match.first, match.second, addToValues); - arguments.erase(match.first, match.second); - - return true; -} - -void insertShapesBuild(std::unordered_map& shapes, - nvinfer1::OptProfileSelector selector, - const std::string& name, - const std::vector& dims) { - shapes[name][static_cast(selector)] = dims; -} - -void insertShapesInference( - std::unordered_map>& shapes, - const std::string& name, const std::vector& dims) { - shapes[name] = dims; -} - -std::string removeSingleQuotationMarks(std::string& str) { - std::vector strList{splitToStringVec(str, '\'')}; - // Remove all the escaped single quotation marks - std::string retVal = ""; - // Do not really care about unterminated sequences - for (size_t i = 0; i < strList.size(); i++) { - retVal += strList[i]; - } - return retVal; -} - -void getLayerPrecisions(Arguments& arguments, char const* argument, - LayerPrecisions& layerPrecisions) { - std::string list; - if (!getAndDelOption(arguments, argument, list)) { - return; - } - - // The layerPrecisions flag contains comma-separated layerName:precision - // pairs. - std::vector precisionList{splitToStringVec(list, ',')}; - for (auto const& s : precisionList) { - auto namePrecisionPair = splitNameAndValue(s); - auto const layerName = removeSingleQuotationMarks(namePrecisionPair.first); - layerPrecisions[layerName] = namePrecisionPair.second; - } -} - -void getLayerOutputTypes(Arguments& arguments, char const* argument, - LayerOutputTypes& layerOutputTypes) { - std::string list; - if (!getAndDelOption(arguments, argument, list)) { - return; - } - - // The layerOutputTypes flag contains comma-separated layerName:types pairs. - std::vector precisionList{splitToStringVec(list, ',')}; - for (auto const& s : precisionList) { - auto namePrecisionPair = splitNameAndValue(s); - auto const layerName = removeSingleQuotationMarks(namePrecisionPair.first); - auto const typeStrings = splitToStringVec(namePrecisionPair.second, '+'); - std::vector typeVec(typeStrings.size(), - nvinfer1::DataType::kFLOAT); - std::transform(typeStrings.begin(), typeStrings.end(), typeVec.begin(), - stringToValue); - layerOutputTypes[layerName] = typeVec; - } -} - -bool getShapesBuild(Arguments& arguments, - std::unordered_map& shapes, - char const* argument, - nvinfer1::OptProfileSelector selector) { - std::string list; - bool retVal = getAndDelOption(arguments, argument, list); - std::vector shapeList{splitToStringVec(list, ',')}; - for (const auto& s : shapeList) { - auto nameDimsPair = splitNameAndValue>(s); - auto tensorName = removeSingleQuotationMarks(nameDimsPair.first); - auto dims = nameDimsPair.second; - insertShapesBuild(shapes, selector, tensorName, dims); - } - return retVal; -} - -bool getShapesInference( - Arguments& arguments, - std::unordered_map>& shapes, - const char* argument) { - std::string list; - bool retVal = getAndDelOption(arguments, argument, list); - std::vector shapeList{splitToStringVec(list, ',')}; - for (const auto& s : shapeList) { - auto nameDimsPair = splitNameAndValue>(s); - auto tensorName = removeSingleQuotationMarks(nameDimsPair.first); - auto dims = nameDimsPair.second; - insertShapesInference(shapes, tensorName, dims); - } - return retVal; -} - -void processShapes(std::unordered_map& shapes, - bool minShapes, bool optShapes, bool maxShapes, bool calib) { - // Only accept optShapes only or all three of minShapes, optShapes, maxShapes - if (((minShapes || maxShapes) && !optShapes) // minShapes only, maxShapes - // only, both minShapes and - // maxShapes - || (minShapes && !maxShapes && optShapes) // both minShapes and optShapes - || (!minShapes && maxShapes && optShapes)) // both maxShapes and optShapes - { - if (calib) { - throw std::invalid_argument( - "Must specify only --optShapesCalib or all of --minShapesCalib, " - "--optShapesCalib, --maxShapesCalib"); - } else { - throw std::invalid_argument( - "Must specify only --optShapes or all of --minShapes, --optShapes, " - "--maxShapes"); - } - } - - // If optShapes only, expand optShapes to minShapes and maxShapes - if (optShapes && !minShapes && !maxShapes) { - std::unordered_map newShapes; - for (auto& s : shapes) { - insertShapesBuild( - newShapes, nvinfer1::OptProfileSelector::kMIN, s.first, - s.second[static_cast(nvinfer1::OptProfileSelector::kOPT)]); - insertShapesBuild( - newShapes, nvinfer1::OptProfileSelector::kOPT, s.first, - s.second[static_cast(nvinfer1::OptProfileSelector::kOPT)]); - insertShapesBuild( - newShapes, nvinfer1::OptProfileSelector::kMAX, s.first, - s.second[static_cast(nvinfer1::OptProfileSelector::kOPT)]); - } - shapes = newShapes; - } -} - -template -void printShapes(std::ostream& os, const char* phase, const T& shapes) { - if (shapes.empty()) { - os << "Input " << phase << " shapes: model" << std::endl; - } else { - for (const auto& s : shapes) { - os << "Input " << phase << " shape: " << s.first << "=" << s.second - << std::endl; - } - } -} - -std::ostream& printBatch(std::ostream& os, int32_t maxBatch) { - if (maxBatch != maxBatchNotProvided) { - os << maxBatch; - } else { - os << "explicit batch"; - } - return os; -} - -std::ostream& printTacticSources(std::ostream& os, - nvinfer1::TacticSources enabledSources, - nvinfer1::TacticSources disabledSources) { - if (!enabledSources && !disabledSources) { - os << "Using default tactic sources"; - } else { - auto const addSource = [&](uint32_t source, std::string const& name) { - if (enabledSources & source) { - os << name << " [ON], "; - } else if (disabledSources & source) { - os << name << " [OFF], "; - } - }; - - addSource(1U << static_cast(nvinfer1::TacticSource::kCUBLAS), - "cublas"); - addSource(1U << static_cast(nvinfer1::TacticSource::kCUBLAS_LT), - "cublasLt"); - addSource(1U << static_cast(nvinfer1::TacticSource::kCUDNN), - "cudnn"); - } - return os; -} - -std::ostream& printPrecision(std::ostream& os, BuildOptions const& options) { - os << "FP32"; - if (options.fp16) { - os << "+FP16"; - } - if (options.int8) { - os << "+INT8"; - } - if (options.precisionConstraints == PrecisionConstraints::kOBEY) { - os << " (obey precision constraints)"; - } - if (options.precisionConstraints == PrecisionConstraints::kPREFER) { - os << " (prefer precision constraints)"; - } - return os; -} - -std::ostream& printTimingCache(std::ostream& os, BuildOptions const& options) { - switch (options.timingCacheMode) { - case TimingCacheMode::kGLOBAL: - os << "global"; - break; - case TimingCacheMode::kLOCAL: - os << "local"; - break; - case TimingCacheMode::kDISABLE: - os << "disable"; - break; - } - return os; -} - -std::ostream& printSparsity(std::ostream& os, BuildOptions const& options) { - switch (options.sparsity) { - case SparsityFlag::kDISABLE: - os << "Disabled"; - break; - case SparsityFlag::kENABLE: - os << "Enabled"; - break; - case SparsityFlag::kFORCE: - os << "Forced"; - break; - } - - return os; -} - -std::ostream& printMemoryPools(std::ostream& os, BuildOptions const& options) { - auto const printValueOrDefault = [&os](double const val) { - if (val >= 0) { - os << val << " MiB"; - } else { - os << "default"; - } - }; - os << "workspace: "; - printValueOrDefault(options.workspace); - os << ", "; - os << "dlaSRAM: "; - printValueOrDefault(options.dlaSRAM); - os << ", "; - os << "dlaLocalDRAM: "; - printValueOrDefault(options.dlaLocalDRAM); - os << ", "; - os << "dlaGlobalDRAM: "; - printValueOrDefault(options.dlaGlobalDRAM); - return os; -} - -} // namespace - -Arguments argsToArgumentsMap(int32_t argc, char* argv[]) { - Arguments arguments; - for (int32_t i = 1; i < argc; ++i) { - auto valuePtr = strchr(argv[i], '='); - if (valuePtr) { - std::string value{valuePtr + 1}; - arguments.emplace(std::string(argv[i], valuePtr - argv[i]), value); - } else { - arguments.emplace(argv[i], ""); - } - } - return arguments; -} - -void BaseModelOptions::parse(Arguments& arguments) { - if (getAndDelOption(arguments, "--onnx", model)) { - format = ModelFormat::kONNX; - } else if (getAndDelOption(arguments, "--uff", model)) { - format = ModelFormat::kUFF; - } else if (getAndDelOption(arguments, "--model", model)) { - format = ModelFormat::kCAFFE; - } -} - -void UffInput::parse(Arguments& arguments) { - getAndDelOption(arguments, "--uffNHWC", NHWC); - std::vector args; - if (getAndDelRepeatedOption(arguments, "--uffInput", args)) { - for (const auto& i : args) { - std::vector values{splitToStringVec(i, ',')}; - if (values.size() == 4) { - nvinfer1::Dims3 dims{std::stoi(values[1]), std::stoi(values[2]), - std::stoi(values[3])}; - inputs.emplace_back(values[0], dims); - } else { - throw std::invalid_argument(std::string("Invalid uffInput ") + i); - } - } - } -} - -void ModelOptions::parse(Arguments& arguments) { - baseModel.parse(arguments); - - switch (baseModel.format) { - case ModelFormat::kCAFFE: { - getAndDelOption(arguments, "--deploy", prototxt); - break; - } - case ModelFormat::kUFF: { - uffInputs.parse(arguments); - if (uffInputs.inputs.empty()) { - throw std::invalid_argument("Uff models require at least one input"); - } - break; - } - case ModelFormat::kONNX: - break; - case ModelFormat::kANY: { - if (getAndDelOption(arguments, "--deploy", prototxt)) { - baseModel.format = ModelFormat::kCAFFE; - } - break; - } - } - - // The --output flag should only be used with Caffe and UFF. It has no effect - // on ONNX. - std::vector outArgs; - if (getAndDelRepeatedOption(arguments, "--output", outArgs)) { - for (const auto& o : outArgs) { - for (auto& v : splitToStringVec(o, ',')) { - outputs.emplace_back(std::move(v)); - } - } - } - if (baseModel.format == ModelFormat::kCAFFE || - baseModel.format == ModelFormat::kUFF) { - if (outputs.empty()) { - throw std::invalid_argument( - "Caffe and Uff models require at least one output"); - } - } else if (baseModel.format == ModelFormat::kONNX) { - if (!outputs.empty()) { - throw std::invalid_argument( - "The --output flag should not be used with ONNX models."); - } - } -} - -void BuildOptions::parse(Arguments& arguments) { - auto getFormats = [&arguments](std::vector& formatsVector, - const char* argument) { - std::string list; - getAndDelOption(arguments, argument, list); - std::vector formats{splitToStringVec(list, ',')}; - for (const auto& f : formats) { - formatsVector.push_back(stringToValue(f)); - } - }; - - getFormats(inputFormats, "--inputIOFormats"); - getFormats(outputFormats, "--outputIOFormats"); - - bool addedExplicitBatchFlag{false}; - getAndDelOption(arguments, "--explicitBatch", addedExplicitBatchFlag); - if (addedExplicitBatchFlag) { - sample::gLogWarning - << "--explicitBatch flag has been deprecated and has no effect!" - << std::endl; - sample::gLogWarning << "Explicit batch dim is automatically enabled if " - "input model is ONNX or if dynamic " - << "shapes are provided when the engine is built." - << std::endl; - } - - bool minShapes = getShapesBuild(arguments, shapes, "--minShapes", - nvinfer1::OptProfileSelector::kMIN); - bool optShapes = getShapesBuild(arguments, shapes, "--optShapes", - nvinfer1::OptProfileSelector::kOPT); - bool maxShapes = getShapesBuild(arguments, shapes, "--maxShapes", - nvinfer1::OptProfileSelector::kMAX); - processShapes(shapes, minShapes, optShapes, maxShapes, false); - bool minShapesCalib = - getShapesBuild(arguments, shapesCalib, "--minShapesCalib", - nvinfer1::OptProfileSelector::kMIN); - bool optShapesCalib = - getShapesBuild(arguments, shapesCalib, "--optShapesCalib", - nvinfer1::OptProfileSelector::kOPT); - bool maxShapesCalib = - getShapesBuild(arguments, shapesCalib, "--maxShapesCalib", - nvinfer1::OptProfileSelector::kMAX); - processShapes(shapesCalib, minShapesCalib, optShapesCalib, maxShapesCalib, - true); - - bool addedExplicitPrecisionFlag{false}; - getAndDelOption(arguments, "--explicitPrecision", addedExplicitPrecisionFlag); - if (addedExplicitPrecisionFlag) { - sample::gLogWarning - << "--explicitPrecision flag has been deprecated and has no effect!" - << std::endl; - } - - if (getAndDelOption(arguments, "--workspace", workspace)) { - sample::gLogWarning - << "--workspace flag has been deprecated by --memPoolSize flag." - << std::endl; - } - - std::string memPoolSizes; - getAndDelOption(arguments, "--memPoolSize", memPoolSizes); - std::vector memPoolSpecs{splitToStringVec(memPoolSizes, ',')}; - for (auto const& memPoolSpec : memPoolSpecs) { - std::string memPoolName; - double memPoolSize; - std::tie(memPoolName, memPoolSize) = splitNameAndValue(memPoolSpec); - if (memPoolSize < 0) { - throw std::invalid_argument(std::string("Negative memory pool size: ") + - std::to_string(memPoolSize)); - } - if (memPoolName == "workspace") { - workspace = memPoolSize; - } else if (memPoolName == "dlaSRAM") { - dlaSRAM = memPoolSize; - } else if (memPoolName == "dlaLocalDRAM") { - dlaLocalDRAM = memPoolSize; - } else if (memPoolName == "dlaGlobalDRAM") { - dlaGlobalDRAM = memPoolSize; - } else if (!memPoolName.empty()) { - throw std::invalid_argument(std::string("Unknown memory pool: ") + - memPoolName); - } - } - - getAndDelOption(arguments, "--maxBatch", maxBatch); - getAndDelOption(arguments, "--minTiming", minTiming); - getAndDelOption(arguments, "--avgTiming", avgTiming); - - bool best{false}; - getAndDelOption(arguments, "--best", best); - if (best) { - int8 = true; - fp16 = true; - } - - getAndDelOption(arguments, "--refit", refittable); - getAndDelNegOption(arguments, "--noTF32", tf32); - getAndDelOption(arguments, "--fp16", fp16); - getAndDelOption(arguments, "--int8", int8); - getAndDelOption(arguments, "--safe", safe); - getAndDelOption(arguments, "--consistency", consistency); - getAndDelOption(arguments, "--restricted", restricted); - - getAndDelOption(arguments, "--directIO", directIO); - - std::string precisionConstraintsString; - getAndDelOption(arguments, "--precisionConstraints", - precisionConstraintsString); - if (!precisionConstraintsString.empty()) { - const std::unordered_map - precisionConstraintsMap = {{"obey", PrecisionConstraints::kOBEY}, - {"prefer", PrecisionConstraints::kPREFER}, - {"none", PrecisionConstraints::kNONE}}; - auto it = precisionConstraintsMap.find(precisionConstraintsString); - if (it == precisionConstraintsMap.end()) { - throw std::invalid_argument( - std::string("Unknown precision constraints: ") + - precisionConstraintsString); - } - precisionConstraints = it->second; - } else { - precisionConstraints = PrecisionConstraints::kNONE; - } - - getLayerPrecisions(arguments, "--layerPrecisions", layerPrecisions); - getLayerOutputTypes(arguments, "--layerOutputTypes", layerOutputTypes); - - if (layerPrecisions.empty() && layerOutputTypes.empty() && - precisionConstraints != PrecisionConstraints::kNONE) { - sample::gLogWarning << "When --precisionConstraints flag is set to " - "\"obey\" or \"prefer\", please add " - << "--layerPrecision/--layerOutputTypes flags to set " - "layer-wise precisions and output " - << "types." << std::endl; - } else if ((!layerPrecisions.empty() || !layerOutputTypes.empty()) && - precisionConstraints == PrecisionConstraints::kNONE) { - sample::gLogWarning << "--layerPrecision/--layerOutputTypes flags have no " - "effect when --precisionConstraints " - << "flag is set to \"none\"." << std::endl; - } - - std::string sparsityString; - getAndDelOption(arguments, "--sparsity", sparsityString); - if (sparsityString == "disable") { - sparsity = SparsityFlag::kDISABLE; - } else if (sparsityString == "enable") { - sparsity = SparsityFlag::kENABLE; - } else if (sparsityString == "force") { - sparsity = SparsityFlag::kFORCE; - } else if (!sparsityString.empty()) { - throw std::invalid_argument(std::string("Unknown sparsity mode: ") + - sparsityString); - } - - bool calibCheck = getAndDelOption(arguments, "--calib", calibration); - if (int8 && calibCheck && !shapes.empty() && shapesCalib.empty()) { - shapesCalib = shapes; - } - - std::string profilingVerbosityString; - if (getAndDelOption(arguments, "--nvtxMode", profilingVerbosityString)) { - sample::gLogWarning - << "--nvtxMode flag has been deprecated by --profilingVerbosity flag." - << std::endl; - } - - getAndDelOption(arguments, "--profilingVerbosity", profilingVerbosityString); - if (profilingVerbosityString == "layer_names_only") { - profilingVerbosity = nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY; - } else if (profilingVerbosityString == "none") { - profilingVerbosity = nvinfer1::ProfilingVerbosity::kNONE; - } else if (profilingVerbosityString == "detailed") { - profilingVerbosity = nvinfer1::ProfilingVerbosity::kDETAILED; - } else if (profilingVerbosityString == "default") { - sample::gLogWarning - << "--profilingVerbosity=default has been deprecated by " - "--profilingVerbosity=layer_names_only." - << std::endl; - profilingVerbosity = nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY; - } else if (profilingVerbosityString == "verbose") { - sample::gLogWarning << "--profilingVerbosity=verbose has been deprecated " - "by --profilingVerbosity=detailed." - << std::endl; - profilingVerbosity = nvinfer1::ProfilingVerbosity::kDETAILED; - } else if (!profilingVerbosityString.empty()) { - throw std::invalid_argument(std::string("Unknown profilingVerbosity: ") + - profilingVerbosityString); - } - - if (getAndDelOption(arguments, "--loadEngine", engine)) { - load = true; - } - if (getAndDelOption(arguments, "--saveEngine", engine)) { - save = true; - } - if (load && save) { - throw std::invalid_argument( - "Incompatible load and save engine options selected"); - } - - std::string tacticSourceArgs; - if (getAndDelOption(arguments, "--tacticSources", tacticSourceArgs)) { - std::vector tacticList = - splitToStringVec(tacticSourceArgs, ','); - for (auto& t : tacticList) { - bool enable{false}; - if (t.front() == '+') { - enable = true; - } else if (t.front() != '-') { - throw std::invalid_argument( - "Tactic source must be prefixed with + or -, indicating whether it " - "should be enabled or disabled " - "respectively."); - } - t.erase(0, 1); - - const auto toUpper = [](std::string& sourceName) { - std::transform(sourceName.begin(), sourceName.end(), sourceName.begin(), - [](char c) { return std::toupper(c); }); - return sourceName; - }; - - nvinfer1::TacticSource source{}; - t = toUpper(t); - if (t == "CUBLAS") { - source = nvinfer1::TacticSource::kCUBLAS; - } else if (t == "CUBLASLT" || t == "CUBLAS_LT") { - source = nvinfer1::TacticSource::kCUBLAS_LT; - } else if (t == "CUDNN") { - source = nvinfer1::TacticSource::kCUDNN; - } else { - throw std::invalid_argument(std::string("Unknown tactic source: ") + t); - } - - uint32_t sourceBit = 1U << static_cast(source); - - if (enable) { - enabledTactics |= sourceBit; - } else { - disabledTactics |= sourceBit; - } - - if (enabledTactics & disabledTactics) { - throw std::invalid_argument(std::string("Cannot enable and disable ") + - t); - } - } - } - - bool noBuilderCache{false}; - getAndDelOption(arguments, "--noBuilderCache", noBuilderCache); - getAndDelOption(arguments, "--timingCacheFile", timingCacheFile); - if (noBuilderCache) { - timingCacheMode = TimingCacheMode::kDISABLE; - } else if (!timingCacheFile.empty()) { - timingCacheMode = TimingCacheMode::kGLOBAL; - } else { - timingCacheMode = TimingCacheMode::kLOCAL; - } -} - -void SystemOptions::parse(Arguments& arguments) { - getAndDelOption(arguments, "--device", device); - getAndDelOption(arguments, "--useDLACore", DLACore); - getAndDelOption(arguments, "--allowGPUFallback", fallback); - std::string pluginName; - while (getAndDelOption(arguments, "--plugins", pluginName)) { - plugins.emplace_back(pluginName); - } -} - -void InferenceOptions::parse(Arguments& arguments) { - getAndDelOption(arguments, "--streams", streams); - getAndDelOption(arguments, "--iterations", iterations); - getAndDelOption(arguments, "--duration", duration); - getAndDelOption(arguments, "--warmUp", warmup); - getAndDelOption(arguments, "--sleepTime", sleep); - getAndDelOption(arguments, "--idleTime", idle); - bool exposeDMA{false}; - if (getAndDelOption(arguments, "--exposeDMA", exposeDMA)) { - overlap = !exposeDMA; - } - getAndDelOption(arguments, "--noDataTransfers", skipTransfers); - getAndDelOption(arguments, "--useManagedMemory", useManaged); - getAndDelOption(arguments, "--useSpinWait", spin); - getAndDelOption(arguments, "--threads", threads); - getAndDelOption(arguments, "--useCudaGraph", graph); - getAndDelOption(arguments, "--separateProfileRun", rerun); - getAndDelOption(arguments, "--buildOnly", skip); - getAndDelOption(arguments, "--timeDeserialize", timeDeserialize); - getAndDelOption(arguments, "--timeRefit", timeRefit); - - std::string list; - getAndDelOption(arguments, "--loadInputs", list); - std::vector inputsList{splitToStringVec(list, ',')}; - splitInsertKeyValue(inputsList, inputs); - - getShapesInference(arguments, shapes, "--shapes"); - getAndDelOption(arguments, "--batch", batch); -} - -void ReportingOptions::parse(Arguments& arguments) { - getAndDelOption(arguments, "--percentile", percentile); - getAndDelOption(arguments, "--avgRuns", avgs); - getAndDelOption(arguments, "--verbose", verbose); - getAndDelOption(arguments, "--dumpRefit", refit); - getAndDelOption(arguments, "--dumpOutput", output); - getAndDelOption(arguments, "--dumpProfile", profile); - getAndDelOption(arguments, "--dumpLayerInfo", layerInfo); - getAndDelOption(arguments, "--exportTimes", exportTimes); - getAndDelOption(arguments, "--exportOutput", exportOutput); - getAndDelOption(arguments, "--exportProfile", exportProfile); - getAndDelOption(arguments, "--exportLayerInfo", exportLayerInfo); - if (percentile < 0 || percentile > 100) { - throw std::invalid_argument(std::string("Percentile ") + - std::to_string(percentile) + - "is not in [0,100]"); - } -} - -bool parseHelp(Arguments& arguments) { - bool helpLong{false}; - bool helpShort{false}; - getAndDelOption(arguments, "--help", helpLong); - getAndDelOption(arguments, "-h", helpShort); - return helpLong || helpShort; -} - -void AllOptions::parse(Arguments& arguments) { - model.parse(arguments); - build.parse(arguments); - system.parse(arguments); - inference.parse(arguments); - - // Use explicitBatch when input model is ONNX or when dynamic shapes are used. - const bool isOnnx{model.baseModel.format == ModelFormat::kONNX}; - const bool hasDynamicShapes{!build.shapes.empty() || - !inference.shapes.empty()}; - const bool detectedExplicitBatch = isOnnx || hasDynamicShapes; - - // Throw an error if user tries to use --batch or --maxBatch when the engine - // has explicit batch dim. - const bool maxBatchWasSet{build.maxBatch != maxBatchNotProvided}; - const bool batchWasSet{inference.batch != batchNotProvided}; - if (detectedExplicitBatch && (maxBatchWasSet || batchWasSet)) { - throw std::invalid_argument( - "The --batch and --maxBatch flags should not be used when the input " - "model is ONNX or when dynamic shapes " - "are provided. Please use --optShapes and --shapes to set input shapes " - "instead."); - } - - // If batch and/or maxBatch is not set and the engine has implicit batch dim, - // set them to default values. - if (!detectedExplicitBatch) { - // If batch is not set, set it to default value. - if (!batchWasSet) { - inference.batch = defaultBatch; - } - // If maxBatch is not set, set it to be equal to batch. - if (!maxBatchWasSet) { - build.maxBatch = inference.batch; - } - // MaxBatch should not be less than batch. - if (build.maxBatch < inference.batch) { - throw std::invalid_argument( - "Build max batch " + std::to_string(build.maxBatch) + - " is less than inference batch " + std::to_string(inference.batch)); - } - } - - if (build.shapes.empty() && !inference.shapes.empty()) { - // If --shapes are provided but --optShapes are not, assume that optShapes - // is the same as shapes. - for (auto& s : inference.shapes) { - insertShapesBuild(build.shapes, nvinfer1::OptProfileSelector::kMIN, - s.first, s.second); - insertShapesBuild(build.shapes, nvinfer1::OptProfileSelector::kOPT, - s.first, s.second); - insertShapesBuild(build.shapes, nvinfer1::OptProfileSelector::kMAX, - s.first, s.second); - } - } else if (!build.shapes.empty() && inference.shapes.empty()) { - // If --optShapes are provided but --shapes are not, assume that shapes is - // the same as optShapes. - for (auto& s : build.shapes) { - insertShapesInference( - inference.shapes, s.first, - s.second[static_cast(nvinfer1::OptProfileSelector::kOPT)]); - } - } - - reporting.parse(arguments); - helps = parseHelp(arguments); - - if (!helps) { - if (!build.load && model.baseModel.format == ModelFormat::kANY) { - throw std::invalid_argument("Model missing or format not recognized"); - } - if (build.safe && system.DLACore >= 0) { - auto checkSafeDLAFormats = [](std::vector const& fmt) { - return fmt.empty() - ? false - : std::all_of(fmt.begin(), fmt.end(), - [](IOFormat const& pair) { - bool supported{false}; - bool const isLINEAR{ - pair.second == - 1U << static_cast( - nvinfer1::TensorFormat::kLINEAR)}; - bool const isCHW4{ - pair.second == - 1U << static_cast( - nvinfer1::TensorFormat::kCHW4)}; - bool const isCHW32{ - pair.second == - 1U << static_cast( - nvinfer1::TensorFormat::kCHW32)}; - bool const isCHW16{ - pair.second == - 1U << static_cast( - nvinfer1::TensorFormat::kCHW16)}; - supported |= pair.first == - nvinfer1::DataType::kINT8 && - (isLINEAR || isCHW4 || isCHW32); - supported |= pair.first == - nvinfer1::DataType::kHALF && - (isLINEAR || isCHW4 || isCHW16); - return supported; - }); - }; - if (!checkSafeDLAFormats(build.inputFormats) || - !checkSafeDLAFormats(build.outputFormats)) { - throw std::invalid_argument( - "I/O formats for safe DLA capability are restricted to " - "fp16/int8:linear, fp16:chw16 or int8:chw32"); - } - if (system.fallback) { - throw std::invalid_argument( - "GPU fallback (--allowGPUFallback) not allowed for safe DLA " - "capability"); - } - } - } -} - -void SafeBuilderOptions::parse(Arguments& arguments) { - auto getFormats = [&arguments](std::vector& formatsVector, - const char* argument) { - std::string list; - getAndDelOption(arguments, argument, list); - std::vector formats{splitToStringVec(list, ',')}; - for (const auto& f : formats) { - formatsVector.push_back(stringToValue(f)); - } - }; - - getAndDelOption(arguments, "--serialized", serialized); - getAndDelOption(arguments, "--onnx", onnxModelFile); - getAndDelOption(arguments, "--help", help); - getAndDelOption(arguments, "-h", help); - getAndDelOption(arguments, "--verbose", verbose); - getAndDelOption(arguments, "-v", verbose); - getFormats(inputFormats, "--inputIOFormats"); - getFormats(outputFormats, "--outputIOFormats"); - getAndDelOption(arguments, "--int8", int8); - getAndDelOption(arguments, "--calib", calibFile); - getAndDelOption(arguments, "--consistency", consistency); - getAndDelOption(arguments, "--std", standard); - std::string pluginName; - while (getAndDelOption(arguments, "--plugins", pluginName)) { - plugins.emplace_back(pluginName); - } -} - -std::ostream& operator<<(std::ostream& os, const BaseModelOptions& options) { - os << "=== Model Options ===" << std::endl; - - os << "Format: "; - switch (options.format) { - case ModelFormat::kCAFFE: { - os << "Caffe"; - break; - } - case ModelFormat::kONNX: { - os << "ONNX"; - break; - } - case ModelFormat::kUFF: { - os << "UFF"; - break; - } - case ModelFormat::kANY: - os << "*"; - break; - } - os << std::endl << "Model: " << options.model << std::endl; - - return os; -} - -std::ostream& operator<<(std::ostream& os, const UffInput& input) { - os << "Uff Inputs Layout: " << (input.NHWC ? "NHWC" : "NCHW") << std::endl; - for (const auto& i : input.inputs) { - os << "Input: " << i.first << "," << i.second.d[0] << "," << i.second.d[1] - << "," << i.second.d[2] << std::endl; - } - - return os; -} - -std::ostream& operator<<(std::ostream& os, const ModelOptions& options) { - os << options.baseModel; - switch (options.baseModel.format) { - case ModelFormat::kCAFFE: { - os << "Prototxt: " << options.prototxt << std::endl; - break; - } - case ModelFormat::kUFF: { - os << options.uffInputs; - break; - } - case ModelFormat::kONNX: // Fallthrough: No options to report for ONNX or - // the generic case - case ModelFormat::kANY: - break; - } - - os << "Output:"; - for (const auto& o : options.outputs) { - os << " " << o; - } - os << std::endl; - - return os; -} - -std::ostream& operator<<(std::ostream& os, nvinfer1::DataType dtype) { - switch (dtype) { - case nvinfer1::DataType::kFLOAT: { - os << "fp32"; - break; - } - case nvinfer1::DataType::kHALF: { - os << "fp16"; - break; - } - case nvinfer1::DataType::kINT8: { - os << "int8"; - break; - } - case nvinfer1::DataType::kINT32: { - os << "int32"; - break; - } - case nvinfer1::DataType::kBOOL: { - os << "bool"; - break; - } - } - return os; -} - -std::ostream& operator<<(std::ostream& os, IOFormat const& format) { - os << format.first << ":"; - - for (int32_t f = 0; f < nvinfer1::EnumMax(); ++f) { - if ((1U << f) & format.second) { - if (f) { - os << "+"; - } - switch (nvinfer1::TensorFormat(f)) { - case nvinfer1::TensorFormat::kLINEAR: { - os << "chw"; - break; - } - case nvinfer1::TensorFormat::kCHW2: { - os << "chw2"; - break; - } - case nvinfer1::TensorFormat::kHWC8: { - os << "hwc8"; - break; - } - case nvinfer1::TensorFormat::kHWC16: { - os << "hwc16"; - break; - } - case nvinfer1::TensorFormat::kCHW4: { - os << "chw4"; - break; - } - case nvinfer1::TensorFormat::kCHW16: { - os << "chw16"; - break; - } - case nvinfer1::TensorFormat::kCHW32: { - os << "chw32"; - break; - } - case nvinfer1::TensorFormat::kDHWC8: { - os << "dhwc8"; - break; - } - case nvinfer1::TensorFormat::kCDHW32: { - os << "cdhw32"; - break; - } - case nvinfer1::TensorFormat::kHWC: { - os << "hwc"; - break; - } - case nvinfer1::TensorFormat::kDLA_LINEAR: { - os << "dla_linear"; - break; - } - case nvinfer1::TensorFormat::kDLA_HWC4: { - os << "dla_hwc4"; - break; - } - } - } - } - return os; -} - -std::ostream& operator<<(std::ostream& os, const ShapeRange& dims) { - int32_t i = 0; - for (const auto& d : dims) { - if (!d.size()) { - break; - } - os << (i ? "+" : "") << d; - ++i; - } - return os; -} - -std::ostream& operator<<(std::ostream& os, - LayerPrecisions const& layerPrecisions) { - int32_t i = 0; - for (auto const& layerPrecision : layerPrecisions) { - os << (i ? "," : "") << layerPrecision.first << ":" - << layerPrecision.second; - ++i; - } - return os; -} - -std::ostream& operator<<(std::ostream& os, const BuildOptions& options) { - // clang-format off - os << "=== Build Options ===" << std::endl << - - "Max batch: "; printBatch(os, options.maxBatch) << std::endl << - "Memory Pools: "; printMemoryPools(os, options) << std::endl << - "minTiming: " << options.minTiming << std::endl << - "avgTiming: " << options.avgTiming << std::endl << - "Precision: "; printPrecision(os, options) << std::endl << - "LayerPrecisions: " << options.layerPrecisions << std::endl << - "Calibration: " << (options.int8 && options.calibration.empty() ? "Dynamic" : options.calibration.c_str()) << std::endl << - "Refit: " << boolToEnabled(options.refittable) << std::endl << - "Sparsity: "; printSparsity(os, options) << std::endl << - "Safe mode: " << boolToEnabled(options.safe) << std::endl << - "DirectIO mode: " << boolToEnabled(options.directIO) << std::endl << - "Restricted mode: " << boolToEnabled(options.restricted) << std::endl << - "Save engine: " << (options.save ? options.engine : "") << std::endl << - "Load engine: " << (options.load ? options.engine : "") << std::endl << - "Profiling verbosity: " << static_cast(options.profilingVerbosity) << std::endl << - "Tactic sources: "; printTacticSources(os, options.enabledTactics, options.disabledTactics) << std::endl << - "timingCacheMode: "; printTimingCache(os, options) << std::endl << - "timingCacheFile: " << options.timingCacheFile << std::endl; - // clang-format on - - auto printIOFormats = [](std::ostream& os, const char* direction, - const std::vector formats) { - if (formats.empty()) { - os << direction << "s format: fp32:CHW" << std::endl; - } else { - for (const auto& f : formats) { - os << direction << ": " << f << std::endl; - } - } - }; - - printIOFormats(os, "Input(s)", options.inputFormats); - printIOFormats(os, "Output(s)", options.outputFormats); - printShapes(os, "build", options.shapes); - printShapes(os, "calibration", options.shapesCalib); - - return os; -} - -std::ostream& operator<<(std::ostream& os, const SystemOptions& options) { - // clang-format off - os << "=== System Options ===" << std::endl << - - "Device: " << options.device << std::endl << - "DLACore: " << (options.DLACore != -1 ? std::to_string(options.DLACore) : "") << - (options.DLACore != -1 && options.fallback ? "(With GPU fallback)" : "") << std::endl; - os << "Plugins:"; - - for (const auto& p : options.plugins) - { - os << " " << p; - } - os << std::endl; - - return os; - // clang-format on -} - -std::ostream& operator<<(std::ostream& os, const InferenceOptions& options) { - // clang-format off - os << "=== Inference Options ===" << std::endl << - - "Batch: "; - if (options.batch && options.shapes.empty()) - { - os << options.batch << std::endl; - } - else - { - os << "Explicit" << std::endl; - } - printShapes(os, "inference", options.shapes); - os << "Iterations: " << options.iterations << std::endl << - "Duration: " << options.duration << "s (+ " - << options.warmup << "ms warm up)" << std::endl << - "Sleep time: " << options.sleep << "ms" << std::endl << - "Idle time: " << options.idle << "ms" << std::endl << - "Streams: " << options.streams << std::endl << - "ExposeDMA: " << boolToEnabled(!options.overlap) << std::endl << - "Data transfers: " << boolToEnabled(!options.skipTransfers) << std::endl << - "Spin-wait: " << boolToEnabled(options.spin) << std::endl << - "Multithreading: " << boolToEnabled(options.threads) << std::endl << - "CUDA Graph: " << boolToEnabled(options.graph) << std::endl << - "Separate profiling: " << boolToEnabled(options.rerun) << std::endl << - "Time Deserialize: " << boolToEnabled(options.timeDeserialize) << std::endl << - "Time Refit: " << boolToEnabled(options.timeRefit) << std::endl << - "Skip inference: " << boolToEnabled(options.skip) << std::endl; - - // clang-format on - os << "Inputs:" << std::endl; - for (const auto& input : options.inputs) { - os << input.first << "<-" << input.second << std::endl; - } - - return os; -} - -std::ostream& operator<<(std::ostream& os, const ReportingOptions& options) { - // clang-format off - os << "=== Reporting Options ===" << std::endl << - - "Verbose: " << boolToEnabled(options.verbose) << std::endl << - "Averages: " << options.avgs << " inferences" << std::endl << - "Percentile: " << options.percentile << std::endl << - "Dump refittable layers:" << boolToEnabled(options.refit) << std::endl << - "Dump output: " << boolToEnabled(options.output) << std::endl << - "Profile: " << boolToEnabled(options.profile) << std::endl << - "Export timing to JSON file: " << options.exportTimes << std::endl << - "Export output to JSON file: " << options.exportOutput << std::endl << - "Export profile to JSON file: " << options.exportProfile << std::endl; - // clang-format on - - return os; -} - -std::ostream& operator<<(std::ostream& os, const AllOptions& options) { - os << options.model << options.build << options.system << options.inference - << options.reporting << std::endl; - return os; -} - -std::ostream& operator<<(std::ostream& os, const SafeBuilderOptions& options) { - auto printIOFormats = [](std::ostream& os, const char* direction, - const std::vector formats) { - if (formats.empty()) { - os << direction << "s format: fp32:CHW" << std::endl; - } else { - for (const auto& f : formats) { - os << direction << ": " << f << std::endl; - } - } - }; - - os << "=== Build Options ===" << std::endl; - os << "Model ONNX: " << options.onnxModelFile << std::endl; - - os << "Precision: FP16"; - if (options.int8) { - os << " + INT8"; - } - os << std::endl; - os << "Calibration file: " << options.calibFile << std::endl; - os << "Serialized Network: " << options.serialized << std::endl; - - printIOFormats(os, "Input(s)", options.inputFormats); - printIOFormats(os, "Output(s)", options.outputFormats); - - os << "Plugins:"; - for (const auto& p : options.plugins) { - os << " " << p; - } - os << std::endl; - return os; -} - -void BaseModelOptions::help(std::ostream& os) { - // clang-format off - os << " --uff= UFF model" << std::endl << - " --onnx= ONNX model" << std::endl << - " --model= Caffe model (default = no model, random weights used)" << std::endl; - // clang-format on -} - -void UffInput::help(std::ostream& os) { - // clang-format off - os << " --uffInput=,X,Y,Z Input blob name and its dimensions (X,Y,Z=C,H,W), it can be specified " - "multiple times; at least one is required for UFF models" << std::endl << - " --uffNHWC Set if inputs are in the NHWC layout instead of NCHW (use " << - "X,Y,Z=H,W,C order in --uffInput)" << std::endl; - // clang-format on -} - -void ModelOptions::help(std::ostream& os) { - // clang-format off - os << "=== Model Options ===" << std::endl; - BaseModelOptions::help(os); - os << " --deploy= Caffe prototxt file" << std::endl << - " --output=[,]* Output names (it can be specified multiple times); at least one output " - "is required for UFF and Caffe" << std::endl; - UffInput::help(os); - // clang-format on -} - -void BuildOptions::help(std::ostream& os) { - // clang-format off - os << "=== Build Options ===" "\n" - " --maxBatch Set max batch size and build an implicit batch engine (default = same size as --batch)" "\n" - " This option should not be used when the input model is ONNX or when dynamic shapes are provided." "\n" - " --minShapes=spec Build with dynamic shapes using a profile with the min shapes provided" "\n" - " --optShapes=spec Build with dynamic shapes using a profile with the opt shapes provided" "\n" - " --maxShapes=spec Build with dynamic shapes using a profile with the max shapes provided" "\n" - " --minShapesCalib=spec Calibrate with dynamic shapes using a profile with the min shapes provided" "\n" - " --optShapesCalib=spec Calibrate with dynamic shapes using a profile with the opt shapes provided" "\n" - " --maxShapesCalib=spec Calibrate with dynamic shapes using a profile with the max shapes provided" "\n" - " Note: All three of min, opt and max shapes must be supplied." "\n" - " However, if only opt shapes is supplied then it will be expanded so" "\n" - " that min shapes and max shapes are set to the same values as opt shapes." "\n" - " Input names can be wrapped with escaped single quotes (ex: \\\'Input:0\\\')." "\n" - " Example input shapes spec: input0:1x3x256x256,input1:1x3x128x128" "\n" - " Each input shape is supplied as a key-value pair where key is the input name and" "\n" - " value is the dimensions (including the batch dimension) to be used for that input." "\n" - " Each key-value pair has the key and value separated using a colon (:)." "\n" - " Multiple input shapes can be provided via comma-separated key-value pairs." "\n" - " --inputIOFormats=spec Type and format of each of the input tensors (default = all inputs in fp32:chw)" "\n" - " See --outputIOFormats help for the grammar of type and format list." "\n" - " Note: If this option is specified, please set comma-separated types and formats for all" "\n" - " inputs following the same order as network inputs ID (even if only one input" "\n" - " needs specifying IO format) or set the type and format once for broadcasting." "\n" - " --outputIOFormats=spec Type and format of each of the output tensors (default = all outputs in fp32:chw)" "\n" - " Note: If this option is specified, please set comma-separated types and formats for all" "\n" - " outputs following the same order as network outputs ID (even if only one output" "\n" - " needs specifying IO format) or set the type and format once for broadcasting." "\n" - " IO Formats: spec ::= IOfmt[\",\"spec]" "\n" - " IOfmt ::= type:fmt" "\n" - " type ::= \"fp32\"|\"fp16\"|\"int32\"|\"int8\"" "\n" - " fmt ::= (\"chw\"|\"chw2\"|\"chw4\"|\"hwc8\"|\"chw16\"|\"chw32\"|\"dhwc8\")[\"+\"fmt]" "\n" - " --workspace=N Set workspace size in MiB." "\n" - " --memPoolSize=poolspec Specify the size constraints of the designated memory pool(s) in MiB." "\n" - " Note: Also accepts decimal sizes, e.g. 0.25MiB. Will be rounded down to the nearest integer bytes." "\n" - " Pool constraint: poolspec ::= poolfmt[\",\"poolspec]" "\n" - " poolfmt ::= pool:sizeInMiB" "\n" - " pool ::= \"workspace\"|\"dlaSRAM\"|\"dlaLocalDRAM\"|\"dlaGlobalDRAM\"" "\n" - " --profilingVerbosity=mode Specify profiling verbosity. mode ::= layer_names_only|detailed|none (default = layer_names_only)" "\n" - " --minTiming=M Set the minimum number of iterations used in kernel selection (default = " - << defaultMinTiming << ")" "\n" - " --avgTiming=M Set the number of times averaged in each iteration for kernel selection (default = " - << defaultAvgTiming << ")" "\n" - " --refit Mark the engine as refittable. This will allow the inspection of refittable layers " "\n" - " and weights within the engine." "\n" - " --sparsity=spec Control sparsity (default = disabled). " "\n" - " Sparsity: spec ::= \"disable\", \"enable\", \"force\"" "\n" - " Note: Description about each of these options is as below" "\n" - " disable = do not enable sparse tactics in the builder (this is the default)" "\n" - " enable = enable sparse tactics in the builder (but these tactics will only be" "\n" - " considered if the weights have the right sparsity pattern)" "\n" - " force = enable sparse tactics in the builder and force-overwrite the weights to have" "\n" - " a sparsity pattern (even if you loaded a model yourself)" "\n" - " --noTF32 Disable tf32 precision (default is to enable tf32, in addition to fp32)" "\n" - " --fp16 Enable fp16 precision, in addition to fp32 (default = disabled)" "\n" - " --int8 Enable int8 precision, in addition to fp32 (default = disabled)" "\n" - " --best Enable all precisions to achieve the best performance (default = disabled)" "\n" - " --directIO Avoid reformatting at network boundaries. (default = disabled)" "\n" - " --precisionConstraints=spec Control precision constraint setting. (default = none)" "\n" - " Precision Constaints: spec ::= \"none\" | \"obey\" | \"prefer\"" "\n" - " none = no constraints" "\n" - " prefer = meet precision constraints set by --layerPrecisions/--layerOutputTypes if possible" "\n" - " obey = meet precision constraints set by --layerPrecisions/--layerOutputTypes or fail" "\n" - " otherwise" "\n" - " --layerPrecisions=spec Control per-layer precision constraints. Effective only when precisionConstraints is set to" "\n" - " \"obey\" or \"prefer\". (default = none)" "\n" - " The specs are read left-to-right, and later ones override earlier ones. \"*\" can be used as a" "\n" - " layerName to specify the default precision for all the unspecified layers." "\n" - " Per-layer precision spec ::= layerPrecision[\",\"spec]" "\n" - " layerPrecision ::= layerName\":\"precision" "\n" - " precision ::= \"fp32\"|\"fp16\"|\"int32\"|\"int8\"" "\n" - " --layerOutputTypes=spec Control per-layer output type constraints. Effective only when precisionConstraints is set to" "\n" - " \"obey\" or \"prefer\". (default = none)" "\n" - " The specs are read left-to-right, and later ones override earlier ones. \"*\" can be used as a" "\n" - " layerName to specify the default precision for all the unspecified layers. If a layer has more than""\n" - " one output, then multiple types separated by \"+\" can be provided for this layer." "\n" - " Per-layer output type spec ::= layerOutputTypes[\",\"spec]" "\n" - " layerOutputTypes ::= layerName\":\"type" "\n" - " type ::= \"fp32\"|\"fp16\"|\"int32\"|\"int8\"[\"+\"type]" "\n" - " --calib= Read INT8 calibration cache file" "\n" - " --safe Enable build safety certified engine" "\n" - " --consistency Perform consistency checking on safety certified engine" "\n" - " --restricted Enable safety scope checking with kSAFETY_SCOPE build flag" "\n" - " --saveEngine= Save the serialized engine" "\n" - " --loadEngine= Load a serialized engine" "\n" - " --tacticSources=tactics Specify the tactics to be used by adding (+) or removing (-) tactics from the default " "\n" - " tactic sources (default = all available tactics)." "\n" - " Note: Currently only cuDNN, cuBLAS and cuBLAS-LT are listed as optional tactics." "\n" - " Tactic Sources: tactics ::= [\",\"tactic]" "\n" - " tactic ::= (+|-)lib" "\n" - " lib ::= \"CUBLAS\"|\"CUBLAS_LT\"|\"CUDNN\"" "\n" - " For example, to disable cudnn and enable cublas: --tacticSources=-CUDNN,+CUBLAS" "\n" - " --noBuilderCache Disable timing cache in builder (default is to enable timing cache)" "\n" - " --timingCacheFile= Save/load the serialized global timing cache" "\n" - ; - // clang-format on - os << std::flush; -} - -void SystemOptions::help(std::ostream& os) { - // clang-format off - os << "=== System Options ===" << std::endl << - " --device=N Select cuda device N (default = " << defaultDevice << ")" << std::endl << - " --useDLACore=N Select DLA core N for layers that support DLA (default = none)" << std::endl << - " --allowGPUFallback When DLA is enabled, allow GPU fallback for unsupported layers " - "(default = disabled)" << std::endl; - os << " --plugins Plugin library (.so) to load (can be specified multiple times)" << std::endl; - // clang-format on -} - -void InferenceOptions::help(std::ostream& os) { - // clang-format off - os << "=== Inference Options ===" << std::endl << - " --batch=N Set batch size for implicit batch engines (default = " << defaultBatch << ")" << std::endl << - " This option should not be used when the engine is built from an ONNX model or when dynamic" << std::endl << - " shapes are provided when the engine is built." << std::endl << - " --shapes=spec Set input shapes for dynamic shapes inference inputs." << std::endl << - " Note: Input names can be wrapped with escaped single quotes (ex: \\\'Input:0\\\')." << std::endl << - " Example input shapes spec: input0:1x3x256x256, input1:1x3x128x128" << std::endl << - " Each input shape is supplied as a key-value pair where key is the input name and" << std::endl << - " value is the dimensions (including the batch dimension) to be used for that input." << std::endl << - " Each key-value pair has the key and value separated using a colon (:)." << std::endl << - " Multiple input shapes can be provided via comma-separated key-value pairs." << std::endl << - " --loadInputs=spec Load input values from files (default = generate random inputs). Input names can be " - "wrapped with single quotes (ex: 'Input:0')" << std::endl << - " Input values spec ::= Ival[\",\"spec]" << std::endl << - " Ival ::= name\":\"file" << std::endl << - " --iterations=N Run at least N inference iterations (default = " << defaultIterations << ")" << std::endl << - " --warmUp=N Run for N milliseconds to warmup before measuring performance (default = " - << defaultWarmUp << ")" << std::endl << - " --duration=N Run performance measurements for at least N seconds wallclock time (default = " - << defaultDuration << ")" << std::endl << - " --sleepTime=N Delay inference start with a gap of N milliseconds between launch and compute " - "(default = " << defaultSleep << ")" << std::endl << - " --idleTime=N Sleep N milliseconds between two continuous iterations" - "(default = " << defaultIdle << ")" << std::endl << - " --streams=N Instantiate N engines to use concurrently (default = " << defaultStreams << ")" << std::endl << - " --exposeDMA Serialize DMA transfers to and from device (default = disabled)." << std::endl << - " --noDataTransfers Disable DMA transfers to and from device (default = enabled)." << std::endl << - " --useManagedMemory Use managed memory instead of seperate host and device allocations (default = disabled)." << std::endl << - " --useSpinWait Actively synchronize on GPU events. This option may decrease synchronization time but " - "increase CPU usage and power (default = disabled)" << std::endl << - " --threads Enable multithreading to drive engines with independent threads" - " or speed up refitting (default = disabled) " << std::endl << - " --useCudaGraph Use CUDA graph to capture engine execution and then launch inference (default = disabled)." << std::endl << - " This flag may be ignored if the graph capture fails." << std::endl << - " --timeDeserialize Time the amount of time it takes to deserialize the network and exit." << std::endl << - " --timeRefit Time the amount of time it takes to refit the engine before inference." << std::endl << - " --separateProfileRun Do not attach the profiler in the benchmark run; if profiling is enabled, a second " - "profile run will be executed (default = disabled)" << std::endl << - " --buildOnly Skip inference perf measurement (default = disabled)" << std::endl; - // clang-format on -} - -void ReportingOptions::help(std::ostream& os) { - // clang-format off - os << "=== Reporting Options ===" << std::endl << - " --verbose Use verbose logging (default = false)" << std::endl << - " --avgRuns=N Report performance measurements averaged over N consecutive " - "iterations (default = " << defaultAvgRuns << ")" << std::endl << - " --percentile=P Report performance for the P percentage (0<=P<=100, 0 " - "representing max perf, and 100 representing min perf; (default" - " = " << defaultPercentile << "%)" << std::endl << - " --dumpRefit Print the refittable layers and weights from a refittable " - "engine" << std::endl << - " --dumpOutput Print the output tensor(s) of the last inference iteration " - "(default = disabled)" << std::endl << - " --dumpProfile Print profile information per layer (default = disabled)" << std::endl << - " --dumpLayerInfo Print layer information of the engine to console " - "(default = disabled)" << std::endl << - " --exportTimes= Write the timing results in a json file (default = disabled)" << std::endl << - " --exportOutput= Write the output tensors to a json file (default = disabled)" << std::endl << - " --exportProfile= Write the profile information per layer in a json file " - "(default = disabled)" << std::endl << - " --exportLayerInfo= Write the layer information of the engine in a json file " - "(default = disabled)" << std::endl; - // clang-format on -} - -void helpHelp(std::ostream& os) { - // clang-format off - os << "=== Help ===" << std::endl << - " --help, -h Print this message" << std::endl; - // clang-format on -} - -void AllOptions::help(std::ostream& os) { - ModelOptions::help(os); - os << std::endl; - BuildOptions::help(os); - os << std::endl; - InferenceOptions::help(os); - os << std::endl; - // clang-format off - os << "=== Build and Inference Batch Options ===" << std::endl << - " When using implicit batch, the max batch size of the engine, if not given, " << std::endl << - " is set to the inference batch size;" << std::endl << - " when using explicit batch, if shapes are specified only for inference, they " << std::endl << - " will be used also as min/opt/max in the build profile; if shapes are " << std::endl << - " specified only for the build, the opt shapes will be used also for inference;" << std::endl << - " if both are specified, they must be compatible; and if explicit batch is " << std::endl << - " enabled but neither is specified, the model must provide complete static" << std::endl << - " dimensions, including batch size, for all inputs" << std::endl << - " Using ONNX models automatically forces explicit batch." << std::endl << - std::endl; - // clang-format on - ReportingOptions::help(os); - os << std::endl; - SystemOptions::help(os); - os << std::endl; - helpHelp(os); -} - -void SafeBuilderOptions::printHelp(std::ostream& os) { - // clang-format off - os << "=== Mandatory ===" << std::endl << - " --onnx= ONNX model" << std::endl << - " " << std::endl << - "=== Optional ===" << std::endl << - " --inputIOFormats=spec Type and format of each of the input tensors (default = all inputs in fp32:chw)" << std::endl << - " See --outputIOFormats help for the grammar of type and format list." << std::endl << - " Note: If this option is specified, please set comma-separated types and formats for all" << std::endl << - " inputs following the same order as network inputs ID (even if only one input" << std::endl << - " needs specifying IO format) or set the type and format once for broadcasting." << std::endl << - " --outputIOFormats=spec Type and format of each of the output tensors (default = all outputs in fp32:chw)" << std::endl << - " Note: If this option is specified, please set comma-separated types and formats for all" << std::endl << - " outputs following the same order as network outputs ID (even if only one output" << std::endl << - " needs specifying IO format) or set the type and format once for broadcasting." << std::endl << - " IO Formats: spec ::= IOfmt[\",\"spec]" << std::endl << - " IOfmt ::= type:fmt" << std::endl << - " type ::= \"fp32\"|\"fp16\"|\"int32\"|\"int8\"" << std::endl << - " fmt ::= (\"chw\"|\"chw2\"|\"chw4\"|\"hwc8\"|\"chw16\"|\"chw32\"|\"dhwc8\")[\"+\"fmt]" << std::endl << - " --int8 Enable int8 precision, in addition to fp16 (default = disabled)" << std::endl << - " --consistency Enable consistency check for serialized engine, (default = disabled)" << std::endl << - " --std Build standard serialized engine, (default = disabled)" << std::endl << - " --calib= Read INT8 calibration cache file" << std::endl << - " --serialized= Save the serialized network" << std::endl << - " --plugins Plugin library (.so) to load (can be specified multiple times)" << std::endl << - " --verbose or -v Use verbose logging (default = false)" << std::endl << - " --help or -h Print this message" << std::endl << - " " << std::endl; - // clang-format on -} - -} // namespace sample diff --git a/csrcs/fastdeploy/backends/tensorrt/common/sampleOptions.h b/csrcs/fastdeploy/backends/tensorrt/common/sampleOptions.h deleted file mode 100644 index 99293da10..000000000 --- a/csrcs/fastdeploy/backends/tensorrt/common/sampleOptions.h +++ /dev/null @@ -1,311 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef TRT_SAMPLE_OPTIONS_H -#define TRT_SAMPLE_OPTIONS_H - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "NvInfer.h" - -namespace sample { - -// Build default params -constexpr int32_t maxBatchNotProvided{0}; -constexpr int32_t defaultMinTiming{1}; -constexpr int32_t defaultAvgTiming{8}; - -// System default params -constexpr int32_t defaultDevice{0}; - -// Inference default params -constexpr int32_t defaultBatch{1}; -constexpr int32_t batchNotProvided{0}; -constexpr int32_t defaultStreams{1}; -constexpr int32_t defaultIterations{10}; -constexpr float defaultWarmUp{200.F}; -constexpr float defaultDuration{3.F}; -constexpr float defaultSleep{}; -constexpr float defaultIdle{}; - -// Reporting default params -constexpr int32_t defaultAvgRuns{10}; -constexpr float defaultPercentile{99}; - -enum class PrecisionConstraints { kNONE, kOBEY, kPREFER }; - -enum class ModelFormat { kANY, kCAFFE, kONNX, kUFF }; - -enum class SparsityFlag { kDISABLE, kENABLE, kFORCE }; - -enum class TimingCacheMode { kDISABLE, kLOCAL, kGLOBAL }; - -using Arguments = std::unordered_multimap; - -using IOFormat = std::pair; - -using ShapeRange = - std::array, - nvinfer1::EnumMax()>; - -using LayerPrecisions = std::unordered_map; -using LayerOutputTypes = - std::unordered_map>; - -struct Options { - virtual void parse(Arguments& arguments) = 0; -}; - -struct BaseModelOptions : public Options { - ModelFormat format{ModelFormat::kANY}; - std::string model; - - void parse(Arguments& arguments) override; - - static void help(std::ostream& out); -}; - -struct UffInput : public Options { - std::vector> inputs; - bool NHWC{false}; - - void parse(Arguments& arguments) override; - - static void help(std::ostream& out); -}; - -struct ModelOptions : public Options { - BaseModelOptions baseModel; - std::string prototxt; - std::vector outputs; - UffInput uffInputs; - - void parse(Arguments& arguments) override; - - static void help(std::ostream& out); -}; - -struct BuildOptions : public Options { - int32_t maxBatch{maxBatchNotProvided}; - double workspace{-1.0}; - double dlaSRAM{-1.0}; - double dlaLocalDRAM{-1.0}; - double dlaGlobalDRAM{-1.0}; - int32_t minTiming{defaultMinTiming}; - int32_t avgTiming{defaultAvgTiming}; - bool tf32{true}; - bool fp16{false}; - bool int8{false}; - bool directIO{false}; - PrecisionConstraints precisionConstraints{PrecisionConstraints::kNONE}; - LayerPrecisions layerPrecisions; - LayerOutputTypes layerOutputTypes; - bool safe{false}; - bool consistency{false}; - bool restricted{false}; - bool save{false}; - bool load{false}; - bool refittable{false}; - SparsityFlag sparsity{SparsityFlag::kDISABLE}; - nvinfer1::ProfilingVerbosity profilingVerbosity{ - nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY}; - std::string engine; - std::string calibration; - std::unordered_map shapes; - std::unordered_map shapesCalib; - std::vector inputFormats; - std::vector outputFormats; - nvinfer1::TacticSources enabledTactics{0}; - nvinfer1::TacticSources disabledTactics{0}; - TimingCacheMode timingCacheMode{TimingCacheMode::kLOCAL}; - std::string timingCacheFile{}; - void parse(Arguments& arguments) override; - - static void help(std::ostream& out); -}; - -struct SystemOptions : public Options { - int32_t device{defaultDevice}; - int32_t DLACore{-1}; - bool fallback{false}; - std::vector plugins; - - void parse(Arguments& arguments) override; - - static void help(std::ostream& out); -}; - -struct InferenceOptions : public Options { - int32_t batch{batchNotProvided}; - int32_t iterations{defaultIterations}; - int32_t streams{defaultStreams}; - float warmup{defaultWarmUp}; - float duration{defaultDuration}; - float sleep{defaultSleep}; - float idle{defaultIdle}; - bool overlap{true}; - bool skipTransfers{false}; - bool useManaged{false}; - bool spin{false}; - bool threads{false}; - bool graph{false}; - bool skip{false}; - bool rerun{false}; - bool timeDeserialize{false}; - bool timeRefit{false}; - std::unordered_map inputs; - std::unordered_map> shapes; - - void parse(Arguments& arguments) override; - - static void help(std::ostream& out); -}; - -struct ReportingOptions : public Options { - bool verbose{false}; - int32_t avgs{defaultAvgRuns}; - float percentile{defaultPercentile}; - bool refit{false}; - bool output{false}; - bool profile{false}; - bool layerInfo{false}; - std::string exportTimes; - std::string exportOutput; - std::string exportProfile; - std::string exportLayerInfo; - - void parse(Arguments& arguments) override; - - static void help(std::ostream& out); -}; - -struct SafeBuilderOptions : public Options { - std::string serialized{}; - std::string onnxModelFile{}; - bool help{false}; - bool verbose{false}; - std::vector inputFormats; - std::vector outputFormats; - bool int8{false}; - std::string calibFile{}; - std::vector plugins; - bool consistency{false}; - bool standard{false}; - - void parse(Arguments& arguments) override; - - static void printHelp(std::ostream& out); -}; - -struct AllOptions : public Options { - ModelOptions model; - BuildOptions build; - SystemOptions system; - InferenceOptions inference; - ReportingOptions reporting; - bool helps{false}; - - void parse(Arguments& arguments) override; - - static void help(std::ostream& out); -}; - -Arguments argsToArgumentsMap(int32_t argc, char* argv[]); - -bool parseHelp(Arguments& arguments); - -void helpHelp(std::ostream& out); - -// Functions to print options - -std::ostream& operator<<(std::ostream& os, const BaseModelOptions& options); - -std::ostream& operator<<(std::ostream& os, const UffInput& input); - -std::ostream& operator<<(std::ostream& os, const IOFormat& format); - -std::ostream& operator<<(std::ostream& os, const ShapeRange& dims); - -std::ostream& operator<<(std::ostream& os, const ModelOptions& options); - -std::ostream& operator<<(std::ostream& os, const BuildOptions& options); - -std::ostream& operator<<(std::ostream& os, const SystemOptions& options); - -std::ostream& operator<<(std::ostream& os, const InferenceOptions& options); - -std::ostream& operator<<(std::ostream& os, const ReportingOptions& options); - -std::ostream& operator<<(std::ostream& os, const AllOptions& options); - -std::ostream& operator<<(std::ostream& os, const SafeBuilderOptions& options); - -inline std::ostream& operator<<(std::ostream& os, const nvinfer1::Dims& dims) { - for (int32_t i = 0; i < dims.nbDims; ++i) { - os << (i ? "x" : "") << dims.d[i]; - } - return os; -} -inline std::ostream& operator<<(std::ostream& os, - const nvinfer1::WeightsRole role) { - switch (role) { - case nvinfer1::WeightsRole::kKERNEL: { - os << "Kernel"; - break; - } - case nvinfer1::WeightsRole::kBIAS: { - os << "Bias"; - break; - } - case nvinfer1::WeightsRole::kSHIFT: { - os << "Shift"; - break; - } - case nvinfer1::WeightsRole::kSCALE: { - os << "Scale"; - break; - } - case nvinfer1::WeightsRole::kCONSTANT: { - os << "Constant"; - break; - } - case nvinfer1::WeightsRole::kANY: { - os << "Any"; - break; - } - } - - return os; -} - -inline std::ostream& operator<<(std::ostream& os, - const std::vector& vec) { - for (int32_t i = 0, e = static_cast(vec.size()); i < e; ++i) { - os << (i ? "x" : "") << vec[i]; - } - return os; -} - -} // namespace sample - -#endif // TRT_SAMPLES_OPTIONS_H diff --git a/csrcs/fastdeploy/backends/tensorrt/common/sampleReporting.cpp b/csrcs/fastdeploy/backends/tensorrt/common/sampleReporting.cpp deleted file mode 100644 index 5e8e8619b..000000000 --- a/csrcs/fastdeploy/backends/tensorrt/common/sampleReporting.cpp +++ /dev/null @@ -1,480 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "sampleInference.h" -#include "sampleOptions.h" -#include "sampleReporting.h" - -using namespace nvinfer1; - -namespace sample { - -namespace { - -//! -//! \brief Find percentile in an ascending sequence of timings -//! \note percentile must be in [0, 100]. Otherwise, an exception is thrown. -//! -template -float findPercentile(float percentile, - std::vector const& timings, - T const& toFloat) { - int32_t const all = static_cast(timings.size()); - int32_t const exclude = static_cast((1 - percentile / 100) * all); - if (timings.empty()) { - return std::numeric_limits::infinity(); - } - if (percentile < 0.0f || percentile > 100.0f) { - throw std::runtime_error("percentile is not in [0, 100]!"); - } - return toFloat(timings[std::max(all - 1 - exclude, 0)]); -} - -//! -//! \brief Find median in a sorted sequence of timings -//! -template -float findMedian(std::vector const& timings, T const& toFloat) { - if (timings.empty()) { - return std::numeric_limits::infinity(); - } - - int32_t const m = timings.size() / 2; - if (timings.size() % 2) { - return toFloat(timings[m]); - } - - return (toFloat(timings[m - 1]) + toFloat(timings[m])) / 2; -} - -//! -//! \brief Find coefficient of variance (which is std / mean) in a sorted -//! sequence of timings given the mean -//! -template -float findCoeffOfVariance(std::vector const& timings, - T const& toFloat, float mean) { - if (timings.empty()) { - return 0; - } - - if (mean == 0.F) { - return std::numeric_limits::infinity(); - } - - auto const metricAccumulator = [toFloat, mean](float acc, - InferenceTime const& a) { - float const diff = toFloat(a) - mean; - return acc + diff * diff; - }; - float const variance = - std::accumulate(timings.begin(), timings.end(), 0.F, metricAccumulator) / - timings.size(); - - return std::sqrt(variance) / mean * 100.F; -} - -inline InferenceTime traceToTiming(const InferenceTrace& a) { - return InferenceTime((a.enqEnd - a.enqStart), (a.h2dEnd - a.h2dStart), - (a.computeEnd - a.computeStart), (a.d2hEnd - a.d2hStart), - (a.d2hEnd - a.h2dStart)); -} - -} // namespace - -void printProlog(int32_t warmups, int32_t timings, float warmupMs, - float benchTimeMs, std::ostream& os) { - os << "Warmup completed " << warmups << " queries over " << warmupMs << " ms" - << std::endl; - os << "Timing trace has " << timings << " queries over " << benchTimeMs / 1000 - << " s" << std::endl; -} - -void printTiming(std::vector const& timings, int32_t runsPerAvg, - std::ostream& os) { - int32_t count = 0; - InferenceTime sum; - - os << std::endl; - os << "=== Trace details ===" << std::endl; - os << "Trace averages of " << runsPerAvg << " runs:" << std::endl; - for (auto const& t : timings) { - sum += t; - - if (++count == runsPerAvg) { - // clang-format off - os << "Average on " << runsPerAvg << " runs - GPU latency: " << sum.compute / runsPerAvg - << " ms - Host latency: " << sum.latency() / runsPerAvg << " ms (end to end " << sum.e2e / runsPerAvg - << " ms, enqueue " << sum.enq / runsPerAvg << " ms)" << std::endl; - // clang-format on - count = 0; - sum.enq = 0; - sum.h2d = 0; - sum.compute = 0; - sum.d2h = 0; - sum.e2e = 0; - } - } -} - -void printMetricExplanations(std::ostream& os) { - os << std::endl; - os << "=== Explanations of the performance metrics ===" << std::endl; - os << "Total Host Walltime: the host walltime from when the first query " - "(after warmups) is enqueued to when the " - "last query is completed." - << std::endl; - os << "GPU Compute Time: the GPU latency to execute the kernels for a query." - << std::endl; - os << "Total GPU Compute Time: the summation of the GPU Compute Time of all " - "the queries. If this is significantly " - "shorter than Total Host Walltime, the GPU may be under-utilized " - "because of host-side overheads or data " - "transfers." - << std::endl; - os << "Throughput: the observed throughput computed by dividing the number " - "of queries by the Total Host Walltime. " - "If this is significantly lower than the reciprocal of GPU Compute " - "Time, the GPU may be under-utilized " - "because of host-side overheads or data transfers." - << std::endl; - os << "Enqueue Time: the host latency to enqueue a query. If this is longer " - "than GPU Compute Time, the GPU may be " - "under-utilized." - << std::endl; - os << "H2D Latency: the latency for host-to-device data transfers for input " - "tensors of a single query." - << std::endl; - os << "D2H Latency: the latency for device-to-host data transfers for output " - "tensors of a single query." - << std::endl; - os << "Latency: the summation of H2D Latency, GPU Compute Time, and D2H " - "Latency. This is the latency to infer a " - "single query." - << std::endl; - os << "End-to-End Host Latency: the duration from when the H2D of a query is " - "called to when the D2H of the same " - "query is completed, which includes the latency to wait for the " - "completion of the previous query. This is " - "the latency of a query if multiple queries are enqueued consecutively." - << std::endl; -} - -PerformanceResult -getPerformanceResult(std::vector const& timings, - std::function metricGetter, - float percentile) { - auto const metricComparator = [metricGetter](InferenceTime const& a, - InferenceTime const& b) { - return metricGetter(a) < metricGetter(b); - }; - auto const metricAccumulator = [metricGetter](float acc, - InferenceTime const& a) { - return acc + metricGetter(a); - }; - std::vector newTimings = timings; - std::sort(newTimings.begin(), newTimings.end(), metricComparator); - PerformanceResult result; - result.min = metricGetter(newTimings.front()); - result.max = metricGetter(newTimings.back()); - result.mean = std::accumulate(newTimings.begin(), newTimings.end(), 0.0f, - metricAccumulator) / - newTimings.size(); - result.median = findMedian(newTimings, metricGetter); - result.percentile = findPercentile(percentile, newTimings, metricGetter); - result.coeffVar = findCoeffOfVariance(newTimings, metricGetter, result.mean); - return result; -} - -void printEpilog(std::vector const& timings, float walltimeMs, - float percentile, int32_t batchSize, std::ostream& osInfo, - std::ostream& osWarning, std::ostream& osVerbose) { - float const throughput = batchSize * timings.size() / walltimeMs * 1000; - - auto const getLatency = [](InferenceTime const& t) { return t.latency(); }; - auto const latencyResult = - getPerformanceResult(timings, getLatency, percentile); - - auto const getEndToEnd = [](InferenceTime const& t) { return t.e2e; }; - auto const e2eLatencyResult = - getPerformanceResult(timings, getEndToEnd, percentile); - - auto const getEnqueue = [](InferenceTime const& t) { return t.enq; }; - auto const enqueueResult = - getPerformanceResult(timings, getEnqueue, percentile); - - auto const getH2d = [](InferenceTime const& t) { return t.h2d; }; - auto const h2dResult = getPerformanceResult(timings, getH2d, percentile); - - auto const getCompute = [](InferenceTime const& t) { return t.compute; }; - auto const gpuComputeResult = - getPerformanceResult(timings, getCompute, percentile); - - auto const getD2h = [](InferenceTime const& t) { return t.d2h; }; - auto const d2hResult = getPerformanceResult(timings, getD2h, percentile); - - auto const toPerfString = [percentile](const PerformanceResult& r) { - std::stringstream s; - s << "min = " << r.min << " ms, max = " << r.max << " ms, mean = " << r.mean - << " ms, " - << "median = " << r.median << " ms, percentile(" << percentile - << "%) = " << r.percentile << " ms"; - return s.str(); - }; - - osInfo << std::endl; - osInfo << "=== Performance summary ===" << std::endl; - osInfo << "Throughput: " << throughput << " qps" << std::endl; - osInfo << "Latency: " << toPerfString(latencyResult) << std::endl; - osInfo << "End-to-End Host Latency: " << toPerfString(e2eLatencyResult) - << std::endl; - osInfo << "Enqueue Time: " << toPerfString(enqueueResult) << std::endl; - osInfo << "H2D Latency: " << toPerfString(h2dResult) << std::endl; - osInfo << "GPU Compute Time: " << toPerfString(gpuComputeResult) << std::endl; - osInfo << "D2H Latency: " << toPerfString(d2hResult) << std::endl; - osInfo << "Total Host Walltime: " << walltimeMs / 1000 << " s" << std::endl; - osInfo << "Total GPU Compute Time: " - << gpuComputeResult.mean * timings.size() / 1000 << " s" << std::endl; - - // Report warnings if the throughput is bound by other factors than GPU - // Compute Time. - constexpr float kENQUEUE_BOUND_REPORTING_THRESHOLD{0.8F}; - if (enqueueResult.median > - kENQUEUE_BOUND_REPORTING_THRESHOLD * gpuComputeResult.median) { - osWarning << "* Throughput may be bound by Enqueue Time rather than GPU " - "Compute and the GPU may be under-utilized." - << std::endl; - osWarning << " If not already in use, --useCudaGraph (utilize CUDA graphs " - "where possible) may increase the " - "throughput." - << std::endl; - } - if (h2dResult.median >= gpuComputeResult.median) { - osWarning << "* Throughput may be bound by host-to-device transfers for " - "the inputs rather than GPU Compute and " - "the GPU may be under-utilized." - << std::endl; - osWarning << " Add --noDataTransfers flag to disable data transfers." - << std::endl; - } - if (d2hResult.median >= gpuComputeResult.median) { - osWarning << "* Throughput may be bound by device-to-host transfers for " - "the outputs rather than GPU Compute " - "and the GPU may be under-utilized." - << std::endl; - osWarning << " Add --noDataTransfers flag to disable data transfers." - << std::endl; - } - - // Report warnings if the GPU Compute Time is unstable. - constexpr float kUNSTABLE_PERF_REPORTING_THRESHOLD{1.0F}; - if (gpuComputeResult.coeffVar > kUNSTABLE_PERF_REPORTING_THRESHOLD) { - osWarning - << "* GPU compute time is unstable, with coefficient of variance = " - << gpuComputeResult.coeffVar << "%." << std::endl; - osWarning << " If not already in use, locking GPU clock frequency or " - "adding --useSpinWait may improve the " - << "stability." << std::endl; - } - - // Explain what the metrics mean. - osInfo << "Explanations of the performance metrics are printed in the " - "verbose logs." - << std::endl; - printMetricExplanations(osVerbose); - - osInfo << std::endl; -} - -void printPerformanceReport(std::vector const& trace, - const ReportingOptions& reporting, float warmupMs, - int32_t batchSize, std::ostream& osInfo, - std::ostream& osWarning, std::ostream& osVerbose) { - auto const isNotWarmup = [&warmupMs](const InferenceTrace& a) { - return a.computeStart >= warmupMs; - }; - auto const noWarmup = std::find_if(trace.begin(), trace.end(), isNotWarmup); - int32_t const warmups = noWarmup - trace.begin(); - float const benchTime = trace.back().d2hEnd - noWarmup->h2dStart; - // when implicit batch used, batchSize = options.inference.batch, which is - // parsed through --batch - // when explicit batch used, batchSize = options.inference.batch = 0 - // treat inference with explicit batch as a single query and report the - // throughput - batchSize = batchSize ? batchSize : 1; - printProlog(warmups * batchSize, (trace.size() - warmups) * batchSize, - warmupMs, benchTime, osInfo); - - std::vector timings(trace.size() - warmups); - std::transform(noWarmup, trace.end(), timings.begin(), traceToTiming); - printTiming(timings, reporting.avgs, osInfo); - printEpilog(timings, benchTime, reporting.percentile, batchSize, osInfo, - osWarning, osVerbose); - - if (!reporting.exportTimes.empty()) { - exportJSONTrace(trace, reporting.exportTimes); - } -} - -//! Printed format: -//! [ value, ...] -//! value ::= { "start enq : time, "end enq" : time, "start h2d" : time, "end -//! h2d" : time, "start compute" : time, -//! "end compute" : time, "start d2h" : time, "end d2h" : time, -//! "h2d" : time, "compute" : time, -//! "d2h" : time, "latency" : time, "end to end" : time } -//! -void exportJSONTrace(std::vector const& trace, - std::string const& fileName) { - std::ofstream os(fileName, std::ofstream::trunc); - os << "[" << std::endl; - char const* sep = " "; - for (auto const& t : trace) { - InferenceTime const it(traceToTiming(t)); - os << sep << "{ "; - sep = ", "; - // clang-format off - os << "\"startEnqMs\" : " << t.enqStart << sep << "\"endEnqMs\" : " << t.enqEnd << sep - << "\"startH2dMs\" : " << t.h2dStart << sep << "\"endH2dMs\" : " << t.h2dEnd << sep - << "\"startComputeMs\" : " << t.computeStart << sep << "\"endComputeMs\" : " << t.computeEnd << sep - << "\"startD2hMs\" : " << t.d2hStart << sep << "\"endD2hMs\" : " << t.d2hEnd << sep - << "\"h2dMs\" : " << it.h2d << sep << "\"computeMs\" : " << it.compute << sep - << "\"d2hMs\" : " << it.d2h << sep << "\"latencyMs\" : " << it.latency() << sep - << "\"endToEndMs\" : " << it.e2e << " }" << std::endl; - // clang-format on - } - os << "]" << std::endl; -} - -void Profiler::reportLayerTime(char const* layerName, float timeMs) noexcept { - if (mIterator == mLayers.end()) { - bool const first = !mLayers.empty() && mLayers.begin()->name == layerName; - mUpdatesCount += mLayers.empty() || first; - if (first) { - mIterator = mLayers.begin(); - } else { - mLayers.emplace_back(); - mLayers.back().name = layerName; - mIterator = mLayers.end() - 1; - } - } - - mIterator->timeMs += timeMs; - ++mIterator; -} - -void Profiler::print(std::ostream& os) const noexcept { - std::string const nameHdr("Layer"); - std::string const timeHdr(" Time (ms)"); - std::string const avgHdr(" Avg. Time (ms)"); - std::string const percentageHdr(" Time %"); - - float const totalTimeMs = getTotalTime(); - - auto const cmpLayer = [](LayerProfile const& a, LayerProfile const& b) { - return a.name.size() < b.name.size(); - }; - auto const longestName = - std::max_element(mLayers.begin(), mLayers.end(), cmpLayer); - auto const nameLength = - std::max(longestName->name.size() + 1, nameHdr.size()); - auto const timeLength = timeHdr.size(); - auto const avgLength = avgHdr.size(); - auto const percentageLength = percentageHdr.size(); - - os << std::endl - << "=== Profile (" << mUpdatesCount << " iterations ) ===" << std::endl - << std::setw(nameLength) << nameHdr << timeHdr << avgHdr << percentageHdr - << std::endl; - - for (auto const& p : mLayers) { - // clang-format off - os << std::setw(nameLength) << p.name << std::setw(timeLength) << std::fixed << std::setprecision(2) << p.timeMs - << std::setw(avgLength) << std::fixed << std::setprecision(4) << p.timeMs / mUpdatesCount - << std::setw(percentageLength) << std::fixed << std::setprecision(1) << p.timeMs / totalTimeMs * 100 - << std::endl; - } - { - os << std::setw(nameLength) << "Total" << std::setw(timeLength) << std::fixed << std::setprecision(2) - << totalTimeMs << std::setw(avgLength) << std::fixed << std::setprecision(4) << totalTimeMs / mUpdatesCount - << std::setw(percentageLength) << std::fixed << std::setprecision(1) << 100.0 << std::endl; - // clang-format on - } - os << std::endl; -} - -void Profiler::exportJSONProfile(std::string const& fileName) const noexcept { - std::ofstream os(fileName, std::ofstream::trunc); - os << "[" << std::endl - << " { \"count\" : " << mUpdatesCount << " }" << std::endl; - - auto const totalTimeMs = getTotalTime(); - - for (auto const& l : mLayers) { - // clang-format off - os << ", {" << " \"name\" : \"" << l.name << "\"" - ", \"timeMs\" : " << l.timeMs - << ", \"averageMs\" : " << l.timeMs / mUpdatesCount - << ", \"percentage\" : " << l.timeMs / totalTimeMs * 100 - << " }" << std::endl; - // clang-format on - } - os << "]" << std::endl; -} - -void dumpInputs(nvinfer1::IExecutionContext const& context, - Bindings const& bindings, std::ostream& os) { - os << "Input Tensors:" << std::endl; - bindings.dumpInputs(context, os); -} - -void dumpOutputs(nvinfer1::IExecutionContext const& context, - Bindings const& bindings, std::ostream& os) { - os << "Output Tensors:" << std::endl; - bindings.dumpOutputs(context, os); -} - -void exportJSONOutput(nvinfer1::IExecutionContext const& context, - Bindings const& bindings, std::string const& fileName, - int32_t batch) { - std::ofstream os(fileName, std::ofstream::trunc); - std::string sep = " "; - auto const output = bindings.getOutputBindings(); - os << "[" << std::endl; - for (auto const& binding : output) { - // clang-format off - os << sep << "{ \"name\" : \"" << binding.first << "\"" << std::endl; - sep = ", "; - os << " " << sep << "\"dimensions\" : \""; - bindings.dumpBindingDimensions(binding.second, context, os); - os << "\"" << std::endl; - os << " " << sep << "\"values\" : [ "; - bindings.dumpBindingValues(context, binding.second, os, sep, batch); - os << " ]" << std::endl << " }" << std::endl; - // clang-format on - } - os << "]" << std::endl; -} - -} // namespace sample diff --git a/csrcs/fastdeploy/backends/tensorrt/common/sampleReporting.h b/csrcs/fastdeploy/backends/tensorrt/common/sampleReporting.h deleted file mode 100644 index 68b78af9c..000000000 --- a/csrcs/fastdeploy/backends/tensorrt/common/sampleReporting.h +++ /dev/null @@ -1,211 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef TRT_SAMPLE_REPORTING_H -#define TRT_SAMPLE_REPORTING_H - -#include -#include - -#include "NvInfer.h" - -#include "sampleOptions.h" -#include "sampleUtils.h" - -namespace sample { - -//! -//! \struct InferenceTime -//! \brief Measurement times in milliseconds -//! -struct InferenceTime { - InferenceTime(float q, float i, float c, float o, float e) - : enq(q), h2d(i), compute(c), d2h(o), e2e(e) {} - - InferenceTime() = default; - InferenceTime(InferenceTime const&) = default; - InferenceTime(InferenceTime&&) = default; - InferenceTime& operator=(InferenceTime const&) = default; - InferenceTime& operator=(InferenceTime&&) = default; - ~InferenceTime() = default; - - float enq{0}; // Enqueue - float h2d{0}; // Host to Device - float compute{0}; // Compute - float d2h{0}; // Device to Host - float e2e{0}; // end to end - - // ideal latency - float latency() const { return h2d + compute + d2h; } -}; - -//! -//! \struct InferenceTrace -//! \brief Measurement points in milliseconds -//! -struct InferenceTrace { - InferenceTrace(int32_t s, float es, float ee, float is, float ie, float cs, - float ce, float os, float oe) - : stream(s), enqStart(es), enqEnd(ee), h2dStart(is), h2dEnd(ie), - computeStart(cs), computeEnd(ce), d2hStart(os), d2hEnd(oe) {} - - InferenceTrace() = default; - InferenceTrace(InferenceTrace const&) = default; - InferenceTrace(InferenceTrace&&) = default; - InferenceTrace& operator=(InferenceTrace const&) = default; - InferenceTrace& operator=(InferenceTrace&&) = default; - ~InferenceTrace() = default; - - int32_t stream{0}; - float enqStart{0}; - float enqEnd{0}; - float h2dStart{0}; - float h2dEnd{0}; - float computeStart{0}; - float computeEnd{0}; - float d2hStart{0}; - float d2hEnd{0}; -}; - -inline InferenceTime operator+(InferenceTime const& a, InferenceTime const& b) { - return InferenceTime(a.enq + b.enq, a.h2d + b.h2d, a.compute + b.compute, - a.d2h + b.d2h, a.e2e + b.e2e); -} - -inline InferenceTime operator+=(InferenceTime& a, InferenceTime const& b) { - return a = a + b; -} - -//! -//! \struct PerformanceResult -//! \brief Performance result of a performance metric -//! -struct PerformanceResult { - float min{0}; - float max{0}; - float mean{0}; - float median{0}; - float percentile{0}; - float coeffVar{0}; // coefficient of variation -}; - -//! -//! \brief Print benchmarking time and number of traces collected -//! -void printProlog(int32_t warmups, int32_t timings, float warmupMs, - float walltime, std::ostream& os); - -//! -//! \brief Print a timing trace -//! -void printTiming(std::vector const& timings, int32_t runsPerAvg, - std::ostream& os); - -//! -//! \brief Print the performance summary of a trace -//! -void printEpilog(std::vector const& timings, float percentile, - int32_t batchSize, std::ostream& osInfo, - std::ostream& osWarning, std::ostream& osVerbose); - -//! -//! \brief Get the result of a specific performance metric from a trace -//! -PerformanceResult -getPerformanceResult(std::vector const& timings, - std::function metricGetter, - float percentile); - -//! -//! \brief Print the explanations of the performance metrics printed in -//! printEpilog() function. -//! -void printMetricExplanations(std::ostream& os); - -//! -//! \brief Print and summarize a timing trace -//! -void printPerformanceReport(std::vector const& trace, - ReportingOptions const& reporting, float warmupMs, - int32_t batchSize, std::ostream& osInfo, - std::ostream& osWarning, std::ostream& osVerbose); - -//! -//! \brief Export a timing trace to JSON file -//! -void exportJSONTrace(std::vector const& trace, - std::string const& fileName); - -//! -//! \brief Print input tensors to stream -//! -void dumpInputs(nvinfer1::IExecutionContext const& context, - Bindings const& bindings, std::ostream& os); - -//! -//! \brief Print output tensors to stream -//! -void dumpOutputs(nvinfer1::IExecutionContext const& context, - Bindings const& bindings, std::ostream& os); - -//! -//! \brief Export output tensors to JSON file -//! -void exportJSONOutput(nvinfer1::IExecutionContext const& context, - Bindings const& bindings, std::string const& fileName, - int32_t batch); - -//! -//! \struct LayerProfile -//! \brief Layer profile information -//! -struct LayerProfile { - std::string name; - float timeMs{0}; -}; - -//! -//! \class Profiler -//! \brief Collect per-layer profile information, assuming times are reported in -//! the same order -//! -class Profiler : public nvinfer1::IProfiler { - public: - void reportLayerTime(char const* layerName, float timeMs) noexcept override; - - void print(std::ostream& os) const noexcept; - - //! - //! \brief Export a profile to JSON file - //! - void exportJSONProfile(std::string const& fileName) const noexcept; - - private: - float getTotalTime() const noexcept { - auto const plusLayerTime = [](float accumulator, LayerProfile const& lp) { - return accumulator + lp.timeMs; - }; - return std::accumulate(mLayers.begin(), mLayers.end(), 0.0, plusLayerTime); - } - - std::vector mLayers; - std::vector::iterator mIterator{mLayers.begin()}; - int32_t mUpdatesCount{0}; -}; - -} // namespace sample - -#endif // TRT_SAMPLE_REPORTING_H diff --git a/csrcs/fastdeploy/backends/tensorrt/common/sampleUtils.h b/csrcs/fastdeploy/backends/tensorrt/common/sampleUtils.h deleted file mode 100644 index 2c6f415bc..000000000 --- a/csrcs/fastdeploy/backends/tensorrt/common/sampleUtils.h +++ /dev/null @@ -1,494 +0,0 @@ -/* - * Copyright (c) 1993-2022, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef TRT_SAMPLE_UTILS_H -#define TRT_SAMPLE_UTILS_H - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "NvInfer.h" - -#include "common.h" -#include "logger.h" -#include "sampleDevice.h" -#include "sampleOptions.h" - -namespace sample { - -inline int dataTypeSize(nvinfer1::DataType dataType) { - switch (dataType) { - case nvinfer1::DataType::kINT32: - case nvinfer1::DataType::kFLOAT: - return 4; - case nvinfer1::DataType::kHALF: - return 2; - case nvinfer1::DataType::kBOOL: - case nvinfer1::DataType::kINT8: - return 1; - } - return 0; -} - -template inline T roundUp(T m, T n) { - return ((m + n - 1) / n) * n; -} - -inline int volume(const nvinfer1::Dims& d) { - return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies()); -} - -//! comps is the number of components in a vector. Ignored if vecDim < 0. -inline int64_t volume(const nvinfer1::Dims& dims, const nvinfer1::Dims& strides, - int vecDim, int comps, int batch) { - int maxNbElems = 1; - for (int i = 0; i < dims.nbDims; ++i) { - // Get effective length of axis. - int d = dims.d[i]; - // Any dimension is 0, it is an empty tensor. - if (d == 0) { - return 0; - } - if (i == vecDim) { - d = samplesCommon::divUp(d, comps); - } - maxNbElems = std::max(maxNbElems, d * strides.d[i]); - } - return static_cast(maxNbElems) * batch * (vecDim < 0 ? 1 : comps); -} - -inline int64_t volume(nvinfer1::Dims dims, int vecDim, int comps, int batch) { - if (vecDim != -1) { - dims.d[vecDim] = roundUp(dims.d[vecDim], comps); - } - return volume(dims) * std::max(batch, 1); -} - -inline nvinfer1::Dims toDims(const std::vector& vec) { - int limit = static_cast(nvinfer1::Dims::MAX_DIMS); - if (static_cast(vec.size()) > limit) { - sample::gLogWarning - << "Vector too long, only first 8 elements are used in dimension." - << std::endl; - } - // Pick first nvinfer1::Dims::MAX_DIMS elements - nvinfer1::Dims dims{std::min(static_cast(vec.size()), limit), {}}; - std::copy_n(vec.begin(), dims.nbDims, std::begin(dims.d)); - return dims; -} - -template -inline void fillBuffer(void* buffer, int64_t volume, T min, T max) { - T* typedBuffer = static_cast(buffer); - std::default_random_engine engine; - if (std::is_integral::value) { - std::uniform_int_distribution distribution(min, max); - auto generator = [&engine, &distribution]() { - return static_cast(distribution(engine)); - }; - std::generate(typedBuffer, typedBuffer + volume, generator); - } else { - std::uniform_real_distribution distribution(min, max); - auto generator = [&engine, &distribution]() { - return static_cast(distribution(engine)); - }; - std::generate(typedBuffer, typedBuffer + volume, generator); - } -} - -// Specialization needed for custom type __half -template -inline void fillBufferHalf(void* buffer, int64_t volume, H min, H max) { - H* typedBuffer = static_cast(buffer); - std::default_random_engine engine; - std::uniform_real_distribution distribution(min, max); - auto generator = [&engine, &distribution]() { - return static_cast(distribution(engine)); - }; - std::generate(typedBuffer, typedBuffer + volume, generator); -} -template <> -inline void fillBuffer<__half>(void* buffer, int64_t volume, __half min, - __half max) { - fillBufferHalf(buffer, volume, min, max); -} - -template -inline void dumpBuffer(const void* buffer, const std::string& separator, - std::ostream& os, const Dims& dims, const Dims& strides, - int32_t vectorDim, int32_t spv) { - const int64_t volume = std::accumulate(dims.d, dims.d + dims.nbDims, 1, - std::multiplies()); - const T* typedBuffer = static_cast(buffer); - std::string sep; - for (int64_t v = 0; v < volume; ++v) { - int64_t curV = v; - int32_t dataOffset = 0; - for (int32_t dimIndex = dims.nbDims - 1; dimIndex >= 0; --dimIndex) { - int32_t dimVal = curV % dims.d[dimIndex]; - if (dimIndex == vectorDim) { - dataOffset += (dimVal / spv) * strides.d[dimIndex] * spv + dimVal % spv; - } else { - dataOffset += - dimVal * strides.d[dimIndex] * (vectorDim == -1 ? 1 : spv); - } - curV /= dims.d[dimIndex]; - ASSERT(curV >= 0); - } - - os << sep << typedBuffer[dataOffset]; - sep = separator; - } -} - -inline void loadFromFile(std::string const& fileName, char* dst, size_t size) { - ASSERT(dst); - - std::ifstream file(fileName, std::ios::in | std::ios::binary); - if (file.is_open()) { - file.read(dst, size); - file.close(); - } else { - std::stringstream msg; - msg << "Cannot open file " << fileName << "!"; - throw std::invalid_argument(msg.str()); - } -} - -struct Binding { - bool isInput{false}; - std::unique_ptr buffer; - int64_t volume{0}; - nvinfer1::DataType dataType{nvinfer1::DataType::kFLOAT}; - - void fill(const std::string& fileName) { - loadFromFile(fileName, static_cast(buffer->getHostBuffer()), - buffer->getSize()); - } - - void fill() { - switch (dataType) { - case nvinfer1::DataType::kBOOL: { - fillBuffer(buffer->getHostBuffer(), volume, 0, 1); - break; - } - case nvinfer1::DataType::kINT32: { - fillBuffer(buffer->getHostBuffer(), volume, -128, 127); - break; - } - case nvinfer1::DataType::kINT8: { - fillBuffer(buffer->getHostBuffer(), volume, -128, 127); - break; - } - case nvinfer1::DataType::kFLOAT: { - fillBuffer(buffer->getHostBuffer(), volume, -1.0F, 1.0F); - break; - } - case nvinfer1::DataType::kHALF: { - fillBuffer<__half>(buffer->getHostBuffer(), volume, -1.0F, 1.0F); - break; - } - } - } - - void dump(std::ostream& os, Dims dims, Dims strides, int32_t vectorDim, - int32_t spv, const std::string separator = " ") const { - switch (dataType) { - case nvinfer1::DataType::kBOOL: { - dumpBuffer(buffer->getHostBuffer(), separator, os, dims, strides, - vectorDim, spv); - break; - } - case nvinfer1::DataType::kINT32: { - dumpBuffer(buffer->getHostBuffer(), separator, os, dims, strides, - vectorDim, spv); - break; - } - case nvinfer1::DataType::kINT8: { - dumpBuffer(buffer->getHostBuffer(), separator, os, dims, strides, - vectorDim, spv); - break; - } - case nvinfer1::DataType::kFLOAT: { - dumpBuffer(buffer->getHostBuffer(), separator, os, dims, strides, - vectorDim, spv); - break; - } - case nvinfer1::DataType::kHALF: { - dumpBuffer<__half>(buffer->getHostBuffer(), separator, os, dims, strides, - vectorDim, spv); - break; - } - } - } -}; - -class Bindings { - public: - Bindings() = delete; - explicit Bindings(bool useManaged) : mUseManaged(useManaged) {} - - void addBinding(int b, const std::string& name, bool isInput, int64_t volume, - nvinfer1::DataType dataType, - const std::string& fileName = "") { - while (mBindings.size() <= static_cast(b)) { - mBindings.emplace_back(); - mDevicePointers.emplace_back(); - } - mNames[name] = b; - if (mBindings[b].buffer == nullptr) { - if (mUseManaged) { - mBindings[b].buffer.reset(new UnifiedMirroredBuffer); - } else { - mBindings[b].buffer.reset(new DiscreteMirroredBuffer); - } - } - mBindings[b].isInput = isInput; - // Some memory allocators return nullptr when allocating zero bytes, but - // TensorRT requires a non-null ptr - // even for empty tensors, so allocate a dummy byte. - if (volume == 0) { - mBindings[b].buffer->allocate(1); - } else { - mBindings[b].buffer->allocate( - static_cast(volume) * - static_cast(dataTypeSize(dataType))); - } - mBindings[b].volume = volume; - mBindings[b].dataType = dataType; - mDevicePointers[b] = mBindings[b].buffer->getDeviceBuffer(); - if (isInput) { - if (fileName.empty()) { - fill(b); - } else { - fill(b, fileName); - } - } - } - - void** getDeviceBuffers() { return mDevicePointers.data(); } - - void transferInputToDevice(TrtCudaStream& stream) { - for (auto& b : mNames) { - if (mBindings[b.second].isInput) { - mBindings[b.second].buffer->hostToDevice(stream); - } - } - } - - void transferOutputToHost(TrtCudaStream& stream) { - for (auto& b : mNames) { - if (!mBindings[b.second].isInput) { - mBindings[b.second].buffer->deviceToHost(stream); - } - } - } - - void fill(int binding, const std::string& fileName) { - mBindings[binding].fill(fileName); - } - - void fill(int binding) { mBindings[binding].fill(); } - - void dumpBindingDimensions(int binding, - const nvinfer1::IExecutionContext& context, - std::ostream& os) const { - const auto dims = context.getBindingDimensions(binding); - // Do not add a newline terminator, because the caller may be outputting a - // JSON string. - os << dims; - } - - void dumpBindingValues(const nvinfer1::IExecutionContext& context, - int binding, std::ostream& os, - const std::string& separator = " ", - int32_t batch = 1) const { - Dims dims = context.getBindingDimensions(binding); - Dims strides = context.getStrides(binding); - int32_t vectorDim = context.getEngine().getBindingVectorizedDim(binding); - const int32_t spv = - context.getEngine().getBindingComponentsPerElement(binding); - - if (context.getEngine().hasImplicitBatchDimension()) { - auto insertN = [](Dims& d, int32_t bs) { - const int32_t nbDims = d.nbDims; - ASSERT(nbDims < Dims::MAX_DIMS); - std::copy_backward(&d.d[0], &d.d[nbDims], &d.d[nbDims + 1]); - d.d[0] = bs; - d.nbDims = nbDims + 1; - }; - int32_t batchStride = 0; - for (int32_t i = 0; i < strides.nbDims; ++i) { - if (strides.d[i] * dims.d[i] > batchStride) { - batchStride = strides.d[i] * dims.d[i]; - } - } - insertN(dims, batch); - insertN(strides, batchStride); - vectorDim = (vectorDim == -1) ? -1 : vectorDim + 1; - } - - mBindings[binding].dump(os, dims, strides, vectorDim, spv, separator); - } - - void dumpInputs(const nvinfer1::IExecutionContext& context, - std::ostream& os) const { - auto isInput = [](const Binding& b) { return b.isInput; }; - dumpBindings(context, isInput, os); - } - - void dumpOutputs(const nvinfer1::IExecutionContext& context, - std::ostream& os) const { - auto isOutput = [](const Binding& b) { return !b.isInput; }; - dumpBindings(context, isOutput, os); - } - - void dumpBindings(const nvinfer1::IExecutionContext& context, - std::ostream& os) const { - auto all = [](const Binding& b) { return true; }; - dumpBindings(context, all, os); - } - - void dumpBindings(const nvinfer1::IExecutionContext& context, - bool (*predicate)(const Binding& b), - std::ostream& os) const { - for (const auto& n : mNames) { - const auto binding = n.second; - if (predicate(mBindings[binding])) { - os << n.first << ": ("; - dumpBindingDimensions(binding, context, os); - os << ")" << std::endl; - - dumpBindingValues(context, binding, os); - os << std::endl; - } - } - } - - std::unordered_map getInputBindings() const { - auto isInput = [](const Binding& b) { return b.isInput; }; - return getBindings(isInput); - } - - std::unordered_map getOutputBindings() const { - auto isOutput = [](const Binding& b) { return !b.isInput; }; - return getBindings(isOutput); - } - - std::unordered_map getBindings() const { - auto all = [](const Binding& b) { return true; }; - return getBindings(all); - } - - std::unordered_map - getBindings(bool (*predicate)(const Binding& b)) const { - std::unordered_map bindings; - for (const auto& n : mNames) { - const auto binding = n.second; - if (predicate(mBindings[binding])) { - bindings.insert(n); - } - } - return bindings; - } - - private: - std::unordered_map mNames; - std::vector mBindings; - std::vector mDevicePointers; - bool mUseManaged{false}; -}; - -template struct TrtDestroyer { - void operator()(T* t) { t->destroy(); } -}; - -template using TrtUniquePtr = std::unique_ptr>; - -inline bool broadcastIOFormats(const std::vector& formats, - size_t nbBindings, bool isInput = true) { - bool broadcast = formats.size() == 1; - bool validFormatsCount = broadcast || (formats.size() == nbBindings); - if (!formats.empty() && !validFormatsCount) { - if (isInput) { - throw std::invalid_argument( - "The number of inputIOFormats must match network's inputs or be one " - "for broadcasting."); - } else { - throw std::invalid_argument( - "The number of outputIOFormats must match network's outputs or be " - "one for broadcasting."); - } - } - return broadcast; -} - -inline std::vector loadTimingCacheFile(const std::string inFileName) { - std::ifstream iFile(inFileName, std::ios::in | std::ios::binary); - if (!iFile) { - sample::gLogWarning << "Could not read timing cache from: " << inFileName - << ". A new timing cache will be generated and written." - << std::endl; - return std::vector(); - } - iFile.seekg(0, std::ifstream::end); - size_t fsize = iFile.tellg(); - iFile.seekg(0, std::ifstream::beg); - std::vector content(fsize); - iFile.read(content.data(), fsize); - iFile.close(); - sample::gLogInfo << "Loaded " << fsize << " bytes of timing cache from " - << inFileName << std::endl; - return content; -} - -inline void saveTimingCacheFile(const std::string outFileName, - const IHostMemory* blob) { - std::ofstream oFile(outFileName, std::ios::out | std::ios::binary); - if (!oFile) { - sample::gLogWarning << "Could not write timing cache to: " << outFileName - << std::endl; - return; - } - oFile.write((char*)blob->data(), blob->size()); - oFile.close(); - sample::gLogInfo << "Saved " << blob->size() << " bytes of timing cache to " - << outFileName << std::endl; -} - -inline int32_t getCudaDriverVersion() { - int32_t version{-1}; - cudaCheck(cudaDriverGetVersion(&version)); - return version; -} - -inline int32_t getCudaRuntimeVersion() { - int32_t version{-1}; - cudaCheck(cudaRuntimeGetVersion(&version)); - return version; -} - -} // namespace sample - -#endif // TRT_SAMPLE_UTILS_H diff --git a/csrcs/fastdeploy/backends/tensorrt/common/windows/getopt.c b/csrcs/fastdeploy/backends/tensorrt/common/windows/getopt.c deleted file mode 100644 index 515a55bb1..000000000 --- a/csrcs/fastdeploy/backends/tensorrt/common/windows/getopt.c +++ /dev/null @@ -1,568 +0,0 @@ -/* $OpenBSD: getopt_long.c,v 1.23 2007/10/31 12:34:57 chl Exp $ */ -/* $NetBSD: getopt_long.c,v 1.15 2002/01/31 22:43:40 tv Exp $ */ - -/* - * Copyright (c) 2002 Todd C. Miller - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - * - * Sponsored in part by the Defense Advanced Research Projects - * Agency (DARPA) and Air Force Research Laboratory, Air Force - * Materiel Command, USAF, under agreement number F39502-99-1-0512. - */ -/*- - * Copyright (c) 2000 The NetBSD Foundation, Inc. - * All rights reserved. - * - * This code is derived from software contributed to The NetBSD Foundation - * by Dieter Baron and Thomas Klausner. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include -#include - -#define REPLACE_GETOPT /* use this getopt as the system getopt(3) */ - -#ifdef REPLACE_GETOPT -int opterr = 1; /* if error message should be printed */ -int optind = 1; /* index into parent argv vector */ -int optopt = '?'; /* character checked for validity */ -#undef optreset /* see getopt.h */ -#define optreset __mingw_optreset -int optreset; /* reset getopt */ -char* optarg; /* argument associated with option */ -#endif - -#define PRINT_ERROR ((opterr) && (*options != ':')) - -#define FLAG_PERMUTE 0x01 /* permute non-options to the end of argv */ -#define FLAG_ALLARGS 0x02 /* treat non-options as args to option "-1" */ -#define FLAG_LONGONLY 0x04 /* operate as getopt_long_only */ - -/* return values */ -#define BADCH (int) '?' -#define BADARG ((*options == ':') ? (int) ':' : (int) '?') -#define INORDER (int) 1 - -#ifndef __CYGWIN__ -#define __progname __argv[0] -#else -extern char __declspec(dllimport) * __progname; -#endif - -#ifdef __CYGWIN__ -static char EMSG[] = ""; -#else -#define EMSG "" -#endif - -static int getopt_internal(int, char* const*, const char*, const struct option*, int*, int); -static int parse_long_options(char* const*, const char*, const struct option*, int*, int); -static int gcd(int, int); -static void permute_args(int, int, int, char* const*); - -static char* place = EMSG; /* option letter processing */ - -/* XXX: set optreset to 1 rather than these two */ -static int nonopt_start = -1; /* first non option argument (for permute) */ -static int nonopt_end = -1; /* first option after non options (for permute) */ - -/* Error messages */ -static const char recargchar[] = "option requires an argument -- %c"; -static const char recargstring[] = "option requires an argument -- %s"; -static const char ambig[] = "ambiguous option -- %.*s"; -static const char noarg[] = "option doesn't take an argument -- %.*s"; -static const char illoptchar[] = "unknown option -- %c"; -static const char illoptstring[] = "unknown option -- %s"; - -static void _vwarnx(const char* fmt, va_list ap) -{ - (void) fprintf(stderr, "%s: ", __progname); - if (fmt != NULL) - (void) vfprintf(stderr, fmt, ap); - (void) fprintf(stderr, "\n"); -} - -static void warnx(const char* fmt, ...) -{ - va_list ap; - va_start(ap, fmt); - _vwarnx(fmt, ap); - va_end(ap); -} - -/* - * Compute the greatest common divisor of a and b. - */ -static int gcd(int a, int b) -{ - int c; - - c = a % b; - while (c != 0) - { - a = b; - b = c; - c = a % b; - } - - return (b); -} - -/* - * Exchange the block from nonopt_start to nonopt_end with the block - * from nonopt_end to opt_end (keeping the same order of arguments - * in each block). - */ -static void permute_args(int panonopt_start, int panonopt_end, int opt_end, char* const* nargv) -{ - int cstart, cyclelen, i, j, ncycle, nnonopts, nopts, pos; - char* swap; - - /* - * compute lengths of blocks and number and size of cycles - */ - nnonopts = panonopt_end - panonopt_start; - nopts = opt_end - panonopt_end; - ncycle = gcd(nnonopts, nopts); - cyclelen = (opt_end - panonopt_start) / ncycle; - - for (i = 0; i < ncycle; i++) - { - cstart = panonopt_end + i; - pos = cstart; - for (j = 0; j < cyclelen; j++) - { - if (pos >= panonopt_end) - pos -= nnonopts; - else - pos += nopts; - swap = nargv[pos]; - /* LINTED const cast */ - ((char**) nargv)[pos] = nargv[cstart]; - /* LINTED const cast */ - ((char**) nargv)[cstart] = swap; - } - } -} - -/* - * parse_long_options -- - * Parse long options in argc/argv argument vector. - * Returns -1 if short_too is set and the option does not match long_options. - */ -static int parse_long_options( - char* const* nargv, const char* options, const struct option* long_options, int* idx, int short_too) -{ - char *current_argv, *has_equal; - size_t current_argv_len; - int i, ambiguous, match; - -#define IDENTICAL_INTERPRETATION(_x, _y) \ - (long_options[(_x)].has_arg == long_options[(_y)].has_arg && long_options[(_x)].flag == long_options[(_y)].flag \ - && long_options[(_x)].val == long_options[(_y)].val) - - current_argv = place; - match = -1; - ambiguous = 0; - - optind++; - - if ((has_equal = strchr(current_argv, '=')) != NULL) - { - /* argument found (--option=arg) */ - current_argv_len = has_equal - current_argv; - has_equal++; - } - else - current_argv_len = strlen(current_argv); - - for (i = 0; long_options[i].name; i++) - { - /* find matching long option */ - if (strncmp(current_argv, long_options[i].name, current_argv_len)) - continue; - - if (strlen(long_options[i].name) == current_argv_len) - { - /* exact match */ - match = i; - ambiguous = 0; - break; - } - /* - * If this is a known short option, don't allow - * a partial match of a single character. - */ - if (short_too && current_argv_len == 1) - continue; - - if (match == -1) /* partial match */ - match = i; - else if (!IDENTICAL_INTERPRETATION(i, match)) - ambiguous = 1; - } - if (ambiguous) - { - /* ambiguous abbreviation */ - if (PRINT_ERROR) - warnx(ambig, (int) current_argv_len, current_argv); - optopt = 0; - return (BADCH); - } - if (match != -1) - { /* option found */ - if (long_options[match].has_arg == no_argument && has_equal) - { - if (PRINT_ERROR) - warnx(noarg, (int) current_argv_len, current_argv); - /* - * XXX: GNU sets optopt to val regardless of flag - */ - if (long_options[match].flag == NULL) - optopt = long_options[match].val; - else - optopt = 0; - return (BADARG); - } - if (long_options[match].has_arg == required_argument || long_options[match].has_arg == optional_argument) - { - if (has_equal) - optarg = has_equal; - else if (long_options[match].has_arg == required_argument) - { - /* - * optional argument doesn't use next nargv - */ - optarg = nargv[optind++]; - } - } - if ((long_options[match].has_arg == required_argument) && (optarg == NULL)) - { - /* - * Missing argument; leading ':' indicates no error - * should be generated. - */ - if (PRINT_ERROR) - warnx(recargstring, current_argv); - /* - * XXX: GNU sets optopt to val regardless of flag - */ - if (long_options[match].flag == NULL) - optopt = long_options[match].val; - else - optopt = 0; - --optind; - return (BADARG); - } - } - else - { /* unknown option */ - if (short_too) - { - --optind; - return (-1); - } - if (PRINT_ERROR) - warnx(illoptstring, current_argv); - optopt = 0; - return (BADCH); - } - if (idx) - *idx = match; - if (long_options[match].flag) - { - *long_options[match].flag = long_options[match].val; - return (0); - } - else - return (long_options[match].val); -#undef IDENTICAL_INTERPRETATION -} - -/* - * getopt_internal -- - * Parse argc/argv argument vector. Called by user level routines. - */ -static int getopt_internal( - int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx, int flags) -{ - const char* oli; /* option letter list index */ - int optchar, short_too; - static int posixly_correct = -1; - - if (options == NULL) - return (-1); - - /* - * XXX Some GNU programs (like cvs) set optind to 0 instead of - * XXX using optreset. Work around this braindamage. - */ - if (optind == 0) - optind = optreset = 1; - - /* - * Disable GNU extensions if POSIXLY_CORRECT is set or options - * string begins with a '+'. - * - * CV, 2009-12-14: Check POSIXLY_CORRECT anew if optind == 0 or - * optreset != 0 for GNU compatibility. - */ - if (posixly_correct == -1 || optreset != 0) - posixly_correct = (getenv("POSIXLY_CORRECT") != NULL); - if (*options == '-') - flags |= FLAG_ALLARGS; - else if (posixly_correct || *options == '+') - flags &= ~FLAG_PERMUTE; - if (*options == '+' || *options == '-') - options++; - - optarg = NULL; - if (optreset) - nonopt_start = nonopt_end = -1; -start: - if (optreset || !*place) - { /* update scanning pointer */ - optreset = 0; - if (optind >= nargc) - { /* end of argument vector */ - place = EMSG; - if (nonopt_end != -1) - { - /* do permutation, if we have to */ - permute_args(nonopt_start, nonopt_end, optind, nargv); - optind -= nonopt_end - nonopt_start; - } - else if (nonopt_start != -1) - { - /* - * If we skipped non-options, set optind - * to the first of them. - */ - optind = nonopt_start; - } - nonopt_start = nonopt_end = -1; - return (-1); - } - if (*(place = nargv[optind]) != '-' || (place[1] == '\0' && strchr(options, '-') == NULL)) - { - place = EMSG; /* found non-option */ - if (flags & FLAG_ALLARGS) - { - /* - * GNU extension: - * return non-option as argument to option 1 - */ - optarg = nargv[optind++]; - return (INORDER); - } - if (!(flags & FLAG_PERMUTE)) - { - /* - * If no permutation wanted, stop parsing - * at first non-option. - */ - return (-1); - } - /* do permutation */ - if (nonopt_start == -1) - nonopt_start = optind; - else if (nonopt_end != -1) - { - permute_args(nonopt_start, nonopt_end, optind, nargv); - nonopt_start = optind - (nonopt_end - nonopt_start); - nonopt_end = -1; - } - optind++; - /* process next argument */ - goto start; - } - if (nonopt_start != -1 && nonopt_end == -1) - nonopt_end = optind; - - /* - * If we have "-" do nothing, if "--" we are done. - */ - if (place[1] != '\0' && *++place == '-' && place[1] == '\0') - { - optind++; - place = EMSG; - /* - * We found an option (--), so if we skipped - * non-options, we have to permute. - */ - if (nonopt_end != -1) - { - permute_args(nonopt_start, nonopt_end, optind, nargv); - optind -= nonopt_end - nonopt_start; - } - nonopt_start = nonopt_end = -1; - return (-1); - } - } - - /* - * Check long options if: - * 1) we were passed some - * 2) the arg is not just "-" - * 3) either the arg starts with -- we are getopt_long_only() - */ - if (long_options != NULL && place != nargv[optind] && (*place == '-' || (flags & FLAG_LONGONLY))) - { - short_too = 0; - if (*place == '-') - place++; /* --foo long option */ - else if (*place != ':' && strchr(options, *place) != NULL) - short_too = 1; /* could be short option too */ - - optchar = parse_long_options(nargv, options, long_options, idx, short_too); - if (optchar != -1) - { - place = EMSG; - return (optchar); - } - } - - if ((optchar = (int) *place++) == (int) ':' || (optchar == (int) '-' && *place != '\0') - || (oli = strchr(options, optchar)) == NULL) - { - /* - * If the user specified "-" and '-' isn't listed in - * options, return -1 (non-option) as per POSIX. - * Otherwise, it is an unknown option character (or ':'). - */ - if (optchar == (int) '-' && *place == '\0') - return (-1); - if (!*place) - ++optind; - if (PRINT_ERROR) - warnx(illoptchar, optchar); - optopt = optchar; - return (BADCH); - } - if (long_options != NULL && optchar == 'W' && oli[1] == ';') - { - /* -W long-option */ - if (*place) /* no space */ - /* NOTHING */; - else if (++optind >= nargc) - { /* no arg */ - place = EMSG; - if (PRINT_ERROR) - warnx(recargchar, optchar); - optopt = optchar; - return (BADARG); - } - else /* white space */ - place = nargv[optind]; - optchar = parse_long_options(nargv, options, long_options, idx, 0); - place = EMSG; - return (optchar); - } - if (*++oli != ':') - { /* doesn't take argument */ - if (!*place) - ++optind; - } - else - { /* takes (optional) argument */ - optarg = NULL; - if (*place) /* no white space */ - optarg = place; - else if (oli[1] != ':') - { /* arg not optional */ - if (++optind >= nargc) - { /* no arg */ - place = EMSG; - if (PRINT_ERROR) - warnx(recargchar, optchar); - optopt = optchar; - return (BADARG); - } - else - optarg = nargv[optind]; - } - place = EMSG; - ++optind; - } - /* dump back option letter */ - return (optchar); -} - -#ifdef REPLACE_GETOPT -/* - * getopt -- - * Parse argc/argv argument vector. - * - * [eventually this will replace the BSD getopt] - */ -int getopt(int nargc, char* const* nargv, const char* options) -{ - - /* - * We don't pass FLAG_PERMUTE to getopt_internal() since - * the BSD getopt(3) (unlike GNU) has never done this. - * - * Furthermore, since many privileged programs call getopt() - * before dropping privileges it makes sense to keep things - * as simple (and bug-free) as possible. - */ - return (getopt_internal(nargc, nargv, options, NULL, NULL, 0)); -} -#endif /* REPLACE_GETOPT */ - -/* - * getopt_long -- - * Parse argc/argv argument vector. - */ -int getopt_long(int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx) -{ - - return (getopt_internal(nargc, nargv, options, long_options, idx, FLAG_PERMUTE)); -} - -/* - * getopt_long_only -- - * Parse argc/argv argument vector. - */ -int getopt_long_only(int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx) -{ - - return (getopt_internal(nargc, nargv, options, long_options, idx, FLAG_PERMUTE | FLAG_LONGONLY)); -} diff --git a/csrcs/fastdeploy/backends/tensorrt/common/windows/getopt.h b/csrcs/fastdeploy/backends/tensorrt/common/windows/getopt.h deleted file mode 100644 index baa1d61b5..000000000 --- a/csrcs/fastdeploy/backends/tensorrt/common/windows/getopt.h +++ /dev/null @@ -1,124 +0,0 @@ -/* - * SPDX-FileCopyrightText: Copyright (c) 1993-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __GETOPT_H__ -/** - * DISCLAIMER - * This file has no copyright assigned and is placed in the Public Domain. - * This file is a part of the w64 mingw-runtime package. - * - * The w64 mingw-runtime package and its code is distributed in the hope that it - * will be useful but WITHOUT ANY WARRANTY. ALL WARRANTIES, EXPRESSED OR - * IMPLIED ARE HEREBY DISCLAIMED. This includes but is not limited to - * warranties of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ - -#define __GETOPT_H__ - -/* All the headers include this file. */ -#include - -#if defined(WINGETOPT_SHARED_LIB) -#if defined(BUILDING_WINGETOPT_DLL) -#define WINGETOPT_API __declspec(dllexport) -#else -#define WINGETOPT_API __declspec(dllimport) -#endif -#else -#define WINGETOPT_API -#endif - -#ifdef __cplusplus -extern "C" -{ -#endif - - WINGETOPT_API extern int optind; /* index of first non-option in argv */ - WINGETOPT_API extern int optopt; /* single option character, as parsed */ - WINGETOPT_API extern int opterr; /* flag to enable built-in diagnostics... */ - /* (user may set to zero, to suppress) */ - - WINGETOPT_API extern char* optarg; /* pointer to argument of current option */ - - extern int getopt(int nargc, char* const* nargv, const char* options); - -#ifdef _BSD_SOURCE -/* - * BSD adds the non-standard `optreset' feature, for reinitialisation - * of `getopt' parsing. We support this feature, for applications which - * proclaim their BSD heritage, before including this header; however, - * to maintain portability, developers are advised to avoid it. - */ -#define optreset __mingw_optreset - extern int optreset; -#endif -#ifdef __cplusplus -} -#endif -/* - * POSIX requires the `getopt' API to be specified in `unistd.h'; - * thus, `unistd.h' includes this header. However, we do not want - * to expose the `getopt_long' or `getopt_long_only' APIs, when - * included in this manner. Thus, close the standard __GETOPT_H__ - * declarations block, and open an additional __GETOPT_LONG_H__ - * specific block, only when *not* __UNISTD_H_SOURCED__, in which - * to declare the extended API. - */ -#endif /* !defined(__GETOPT_H__) */ - -#if !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__) -#define __GETOPT_LONG_H__ - -#ifdef __cplusplus -extern "C" -{ -#endif - - struct option /* specification for a long form option... */ - { - const char* name; /* option name, without leading hyphens */ - int has_arg; /* does it take an argument? */ - int* flag; /* where to save its status, or NULL */ - int val; /* its associated status value */ - }; - - enum /* permitted values for its `has_arg' field... */ - { - no_argument = 0, /* option never takes an argument */ - required_argument, /* option always requires an argument */ - optional_argument /* option may take an argument */ - }; - - extern int getopt_long( - int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx); - extern int getopt_long_only( - int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx); -/* - * Previous MinGW implementation had... - */ -#ifndef HAVE_DECL_GETOPT -/* - * ...for the long form API only; keep this for compatibility. - */ -#define HAVE_DECL_GETOPT 1 -#endif - -#ifdef __cplusplus -} -#endif - -#endif /* !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__) */ diff --git a/csrcs/fastdeploy/backends/tensorrt/trt_backend.cc b/csrcs/fastdeploy/backends/tensorrt/trt_backend.cc deleted file mode 100644 index dd3f837d9..000000000 --- a/csrcs/fastdeploy/backends/tensorrt/trt_backend.cc +++ /dev/null @@ -1,528 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/backends/tensorrt/trt_backend.h" -#include "fastdeploy/utils/utils.h" -#ifdef ENABLE_PADDLE_FRONTEND -#include "paddle2onnx/converter.h" -#endif - -namespace fastdeploy { -size_t TrtDataTypeSize(const nvinfer1::DataType& dtype) { - if (dtype == nvinfer1::DataType::kFLOAT) { - return sizeof(float); - } else if (dtype == nvinfer1::DataType::kHALF) { - return sizeof(float) / 2; - } else if (dtype == nvinfer1::DataType::kINT8) { - return sizeof(int8_t); - } else if (dtype == nvinfer1::DataType::kINT32) { - return sizeof(int32_t); - } - // kBOOL - return sizeof(bool); -} - -FDDataType GetFDDataType(const nvinfer1::DataType& dtype) { - if (dtype == nvinfer1::DataType::kFLOAT) { - return FDDataType::FP32; - } else if (dtype == nvinfer1::DataType::kHALF) { - return FDDataType::FP16; - } else if (dtype == nvinfer1::DataType::kINT8) { - return FDDataType::INT8; - } else if (dtype == nvinfer1::DataType::kINT32) { - return FDDataType::INT32; - } - // kBOOL - return FDDataType::BOOL; -} - -std::vector toVec(const nvinfer1::Dims& dim) { - std::vector out(dim.d, dim.d + dim.nbDims); - return out; -} - -bool CheckDynamicShapeConfig(const paddle2onnx::OnnxReader& reader, - const TrtBackendOption& option) { - // paddle2onnx::ModelTensorInfo inputs[reader.NumInputs()]; - // std::string input_shapes[reader.NumInputs()]; - std::vector inputs(reader.NumInputs()); - std::vector input_shapes(reader.NumInputs()); - for (int i = 0; i < reader.NumInputs(); ++i) { - reader.GetInputInfo(i, &inputs[i]); - - // change 0 to -1, when input_dim is a string, onnx will make it to zero - for (int j = 0; j < inputs[i].rank; ++j) { - if (inputs[i].shape[j] <= 0) { - inputs[i].shape[j] = -1; - } - } - - input_shapes[i] = ""; - for (int j = 0; j < inputs[i].rank; ++j) { - if (j != inputs[i].rank - 1) { - input_shapes[i] += (std::to_string(inputs[i].shape[j]) + ", "); - } else { - input_shapes[i] += std::to_string(inputs[i].shape[j]); - } - } - } - - bool all_check_passed = true; - for (int i = 0; i < reader.NumInputs(); ++i) { - bool contain_unknown_dim = false; - for (int j = 0; j < inputs[i].rank; ++j) { - if (inputs[i].shape[j] < 0) { - contain_unknown_dim = true; - } - } - - std::string name(inputs[i].name, strlen(inputs[i].name)); - FDINFO << "The loaded model's input tensor:" << name - << " has shape [" + input_shapes[i] << "]." << std::endl; - if (contain_unknown_dim) { - auto iter1 = option.min_shape.find(name); - auto iter2 = option.max_shape.find(name); - auto iter3 = option.opt_shape.find(name); - if (iter1 == option.min_shape.end() || iter2 == option.max_shape.end() || - iter3 == option.opt_shape.end()) { - FDERROR << "The loaded model's input tensor:" << name - << " has dynamic shape [" + input_shapes[i] + - "], but didn't configure it's shape for tensorrt with " - "SetTrtInputShape correctly." - << std::endl; - all_check_passed = false; - } - } - } - - return all_check_passed; -} - -bool TrtBackend::InitFromTrt(const std::string& trt_engine_file, - const TrtBackendOption& option) { - if (initialized_) { - FDERROR << "TrtBackend is already initlized, cannot initialize again." - << std::endl; - return false; - } - cudaSetDevice(option.gpu_id); - - std::ifstream fin(trt_engine_file, std::ios::binary | std::ios::in); - if (!fin) { - FDERROR << "Failed to open TensorRT Engine file " << trt_engine_file - << std::endl; - return false; - } - fin.seekg(0, std::ios::end); - std::string engine_buffer; - engine_buffer.resize(fin.tellg()); - fin.seekg(0, std::ios::beg); - fin.read(&(engine_buffer.at(0)), engine_buffer.size()); - fin.close(); - SampleUniquePtr runtime{ - createInferRuntime(sample::gLogger.getTRTLogger())}; - if (!runtime) { - FDERROR << "Failed to call createInferRuntime()." << std::endl; - return false; - } - engine_ = std::shared_ptr( - runtime->deserializeCudaEngine(engine_buffer.data(), - engine_buffer.size()), - samplesCommon::InferDeleter()); - if (!engine_) { - FDERROR << "Failed to call deserializeCudaEngine()." << std::endl; - return false; - } - - context_ = std::shared_ptr( - engine_->createExecutionContext()); - FDASSERT(cudaStreamCreate(&stream_) == 0, - "[ERROR] Error occurs while calling cudaStreamCreate()."); - GetInputOutputInfo(); - initialized_ = true; - return true; -} - -bool TrtBackend::InitFromPaddle(const std::string& model_file, - const std::string& params_file, - const TrtBackendOption& option, bool verbose) { - if (initialized_) { - FDERROR << "TrtBackend is already initlized, cannot initialize again." - << std::endl; - return false; - } - -#ifdef ENABLE_PADDLE_FRONTEND - std::vector custom_ops; - for (auto& item : option.custom_op_info_) { - paddle2onnx::CustomOp op; - std::strcpy(op.op_name, item.first.c_str()); - std::strcpy(op.export_op_name, item.second.c_str()); - custom_ops.emplace_back(op); - } - char* model_content_ptr; - int model_content_size = 0; - if (!paddle2onnx::Export(model_file.c_str(), params_file.c_str(), - &model_content_ptr, &model_content_size, 11, true, - verbose, true, true, true, custom_ops.data(), - custom_ops.size())) { - FDERROR << "Error occured while export PaddlePaddle to ONNX format." - << std::endl; - return false; - } - - if (option.remove_multiclass_nms_) { - char* new_model = nullptr; - int new_model_size = 0; - if (!paddle2onnx::RemoveMultiClassNMS(model_content_ptr, model_content_size, - &new_model, &new_model_size)) { - FDERROR << "Try to remove MultiClassNMS failed." << std::endl; - return false; - } - delete[] model_content_ptr; - std::string onnx_model_proto(new_model, new_model + new_model_size); - delete[] new_model; - return InitFromOnnx(onnx_model_proto, option, true); - } - - std::string onnx_model_proto(model_content_ptr, - model_content_ptr + model_content_size); - delete[] model_content_ptr; - model_content_ptr = nullptr; - return InitFromOnnx(onnx_model_proto, option, true); -#else - FDERROR << "Didn't compile with PaddlePaddle frontend, you can try to " - "call `InitFromOnnx` instead." - << std::endl; - return false; -#endif -} - -bool TrtBackend::InitFromOnnx(const std::string& model_file, - const TrtBackendOption& option, - bool from_memory_buffer) { - if (initialized_) { - FDERROR << "TrtBackend is already initlized, cannot initialize again." - << std::endl; - return false; - } - cudaSetDevice(option.gpu_id); - - std::string onnx_content = ""; - if (!from_memory_buffer) { - std::ifstream fin(model_file.c_str(), std::ios::binary | std::ios::in); - if (!fin) { - FDERROR << "[ERROR] Failed to open ONNX model file: " << model_file - << std::endl; - return false; - } - fin.seekg(0, std::ios::end); - onnx_content.resize(fin.tellg()); - fin.seekg(0, std::ios::beg); - fin.read(&(onnx_content.at(0)), onnx_content.size()); - fin.close(); - } else { - onnx_content = model_file; - } - - // This part of code will record the original outputs order - // because the converted tensorrt network may exist wrong order of outputs - outputs_order_.clear(); - auto onnx_reader = - paddle2onnx::OnnxReader(onnx_content.c_str(), onnx_content.size()); - for (int i = 0; i < onnx_reader.NumOutputs(); ++i) { - std::string name( - onnx_reader.output_names[i], - onnx_reader.output_names[i] + strlen(onnx_reader.output_names[i])); - outputs_order_[name] = i; - } - if (!CheckDynamicShapeConfig(onnx_reader, option)) { - FDERROR << "TrtBackend::CheckDynamicShapeConfig failed." << std::endl; - return false; - } - - if (option.serialize_file != "") { - std::ifstream fin(option.serialize_file, std::ios::binary | std::ios::in); - if (fin) { - FDINFO << "Detect serialized TensorRT Engine file in " - << option.serialize_file << ", will load it directly." - << std::endl; - fin.close(); - return InitFromTrt(option.serialize_file); - } - } - - if (!CreateTrtEngine(onnx_content, option)) { - return false; - } - - context_ = std::shared_ptr( - engine_->createExecutionContext()); - FDASSERT(cudaStreamCreate(&stream_) == 0, - "[ERROR] Error occurs while calling cudaStreamCreate()."); - GetInputOutputInfo(); - initialized_ = true; - return true; -} - -bool TrtBackend::Infer(std::vector& inputs, - std::vector* outputs) { - AllocateBufferInDynamicShape(inputs, outputs); - std::vector input_binds(inputs.size()); - for (size_t i = 0; i < inputs.size(); ++i) { - if (inputs[i].dtype == FDDataType::INT64) { - int64_t* data = static_cast(inputs[i].Data()); - std::vector casted_data(data, data + inputs[i].Numel()); - FDASSERT(cudaMemcpyAsync(inputs_buffer_[inputs[i].name].data(), - static_cast(casted_data.data()), - inputs[i].Nbytes() / 2, cudaMemcpyHostToDevice, - stream_) == 0, - "[ERROR] Error occurs while copy memory from CPU to GPU."); - } else { - FDASSERT(cudaMemcpyAsync(inputs_buffer_[inputs[i].name].data(), - inputs[i].Data(), inputs[i].Nbytes(), - cudaMemcpyHostToDevice, stream_) == 0, - "[ERROR] Error occurs while copy memory from CPU to GPU."); - } - } - if (!context_->enqueueV2(bindings_.data(), stream_, nullptr)) { - FDERROR << "Failed to Infer with TensorRT." << std::endl; - return false; - } - for (size_t i = 0; i < outputs->size(); ++i) { - FDASSERT(cudaMemcpyAsync((*outputs)[i].Data(), - outputs_buffer_[(*outputs)[i].name].data(), - (*outputs)[i].Nbytes(), cudaMemcpyDeviceToHost, - stream_) == 0, - "[ERROR] Error occurs while copy memory from GPU to CPU."); - } - return true; -} - -void TrtBackend::GetInputOutputInfo() { - inputs_desc_.clear(); - outputs_desc_.clear(); - auto num_binds = engine_->getNbBindings(); - for (auto i = 0; i < num_binds; ++i) { - std::string name = std::string(engine_->getBindingName(i)); - auto shape = toVec(engine_->getBindingDimensions(i)); - auto dtype = engine_->getBindingDataType(i); - if (engine_->bindingIsInput(i)) { - inputs_desc_.emplace_back(TrtValueInfo{name, shape, dtype}); - inputs_buffer_[name] = DeviceBuffer(dtype); - } else { - outputs_desc_.emplace_back(TrtValueInfo{name, shape, dtype}); - outputs_buffer_[name] = DeviceBuffer(dtype); - } - } - bindings_.resize(num_binds); -} - -void TrtBackend::AllocateBufferInDynamicShape( - const std::vector& inputs, std::vector* outputs) { - for (const auto& item : inputs) { - auto idx = engine_->getBindingIndex(item.name.c_str()); - std::vector shape(item.shape.begin(), item.shape.end()); - auto dims = sample::toDims(shape); - context_->setBindingDimensions(idx, dims); - if (item.Nbytes() > inputs_buffer_[item.name].nbBytes()) { - inputs_buffer_[item.name].resize(dims); - bindings_[idx] = inputs_buffer_[item.name].data(); - } - } - if (outputs->size() != outputs_desc_.size()) { - outputs->resize(outputs_desc_.size()); - } - for (size_t i = 0; i < outputs_desc_.size(); ++i) { - auto idx = engine_->getBindingIndex(outputs_desc_[i].name.c_str()); - auto output_dims = context_->getBindingDimensions(idx); - - // find the original index of output - auto iter = outputs_order_.find(outputs_desc_[i].name); - FDASSERT(iter != outputs_order_.end(), - "Cannot find output:" + outputs_desc_[i].name + - " of tensorrt network from the original model."); - auto ori_idx = iter->second; - (*outputs)[ori_idx].dtype = GetFDDataType(outputs_desc_[i].dtype); - (*outputs)[ori_idx].shape.assign(output_dims.d, - output_dims.d + output_dims.nbDims); - (*outputs)[ori_idx].name = outputs_desc_[i].name; - (*outputs)[ori_idx].data.resize(volume(output_dims) * - TrtDataTypeSize(outputs_desc_[i].dtype)); - if ((*outputs)[ori_idx].Nbytes() > - outputs_buffer_[outputs_desc_[i].name].nbBytes()) { - outputs_buffer_[outputs_desc_[i].name].resize(output_dims); - bindings_[idx] = outputs_buffer_[outputs_desc_[i].name].data(); - } - } -} - -bool TrtBackend::CreateTrtEngine(const std::string& onnx_model, - const TrtBackendOption& option) { - const auto explicitBatch = - 1U << static_cast( - nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); - - builder_ = SampleUniquePtr( - nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger())); - if (!builder_) { - FDERROR << "Failed to call createInferBuilder()." << std::endl; - return false; - } - network_ = SampleUniquePtr( - builder_->createNetworkV2(explicitBatch)); - if (!network_) { - FDERROR << "Failed to call createNetworkV2()." << std::endl; - return false; - } - auto config = SampleUniquePtr( - builder_->createBuilderConfig()); - if (!config) { - FDERROR << "Failed to call createBuilderConfig()." << std::endl; - return false; - } - - if (option.enable_fp16) { - if (!builder_->platformHasFastFp16()) { - FDWARNING << "Detected FP16 is not supported in the current GPU, " - "will use FP32 instead." - << std::endl; - } else { - config->setFlag(nvinfer1::BuilderFlag::kFP16); - } - } - - parser_ = SampleUniquePtr( - nvonnxparser::createParser(*network_, sample::gLogger.getTRTLogger())); - if (!parser_) { - FDERROR << "Failed to call createParser()." << std::endl; - return false; - } - if (!parser_->parse(onnx_model.data(), onnx_model.size())) { - FDERROR << "Failed to parse ONNX model by TensorRT." << std::endl; - return false; - } - - FDINFO << "Start to building TensorRT Engine..." << std::endl; - bool fp16 = builder_->platformHasFastFp16(); - builder_->setMaxBatchSize(option.max_batch_size); - - config->setMaxWorkspaceSize(option.max_workspace_size); - - if (option.max_shape.size() > 0) { - auto profile = builder_->createOptimizationProfile(); - FDASSERT(option.max_shape.size() == option.min_shape.size() && - option.min_shape.size() == option.opt_shape.size(), - "[TrtBackend] Size of max_shape/opt_shape/min_shape in " - "TrtBackendOption should keep same."); - for (const auto& item : option.min_shape) { - // set min shape - FDASSERT(profile->setDimensions(item.first.c_str(), - nvinfer1::OptProfileSelector::kMIN, - sample::toDims(item.second)), - "[TrtBackend] Failed to set min_shape for input: " + item.first + - " in TrtBackend."); - - // set optimization shape - auto iter = option.opt_shape.find(item.first); - FDASSERT(iter != option.opt_shape.end(), - "[TrtBackend] Cannot find input name: " + item.first + - " in TrtBackendOption::opt_shape."); - FDASSERT(profile->setDimensions(item.first.c_str(), - nvinfer1::OptProfileSelector::kOPT, - sample::toDims(iter->second)), - "[TrtBackend] Failed to set opt_shape for input: " + item.first + - " in TrtBackend."); - // set max shape - iter = option.max_shape.find(item.first); - FDASSERT(iter != option.max_shape.end(), - "[TrtBackend] Cannot find input name: " + item.first + - " in TrtBackendOption::max_shape."); - FDASSERT(profile->setDimensions(item.first.c_str(), - nvinfer1::OptProfileSelector::kMAX, - sample::toDims(iter->second)), - "[TrtBackend] Failed to set max_shape for input: " + item.first + - " in TrtBackend."); - } - config->addOptimizationProfile(profile); - } - - SampleUniquePtr plan{ - builder_->buildSerializedNetwork(*network_, *config)}; - if (!plan) { - FDERROR << "Failed to call buildSerializedNetwork()." << std::endl; - return false; - } - - SampleUniquePtr runtime{ - createInferRuntime(sample::gLogger.getTRTLogger())}; - if (!runtime) { - FDERROR << "Failed to call createInferRuntime()." << std::endl; - return false; - } - - engine_ = std::shared_ptr( - runtime->deserializeCudaEngine(plan->data(), plan->size()), - samplesCommon::InferDeleter()); - if (!engine_) { - FDERROR << "Failed to call deserializeCudaEngine()." << std::endl; - return false; - } - - FDINFO << "TensorRT Engine is built succussfully." << std::endl; - if (option.serialize_file != "") { - FDINFO << "Serialize TensorRTEngine to local file " << option.serialize_file - << "." << std::endl; - std::ofstream engine_file(option.serialize_file.c_str()); - if (!engine_file) { - FDERROR << "Failed to open " << option.serialize_file << " to write." - << std::endl; - return false; - } - engine_file.write(static_cast(plan->data()), plan->size()); - engine_file.close(); - FDINFO << "TensorRTEngine is serialized to local file " - << option.serialize_file - << ", we can load this model from the seralized engine " - "directly next time." - << std::endl; - } - return true; -} - -TensorInfo TrtBackend::GetInputInfo(int index) { - FDASSERT(index < NumInputs(), "The index:" + std::to_string(index) + - " should less than the number of inputs:" + - std::to_string(NumInputs()) + "."); - TensorInfo info; - info.name = inputs_desc_[index].name; - info.shape.assign(inputs_desc_[index].shape.begin(), - inputs_desc_[index].shape.end()); - info.dtype = GetFDDataType(inputs_desc_[index].dtype); - return info; -} - -TensorInfo TrtBackend::GetOutputInfo(int index) { - FDASSERT(index < NumOutputs(), - "The index:" + std::to_string(index) + - " should less than the number of outputs:" + - std::to_string(NumOutputs()) + "."); - TensorInfo info; - info.name = outputs_desc_[index].name; - info.shape.assign(outputs_desc_[index].shape.begin(), - outputs_desc_[index].shape.end()); - info.dtype = GetFDDataType(outputs_desc_[index].dtype); - return info; -} -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/backends/tensorrt/trt_backend.h b/csrcs/fastdeploy/backends/tensorrt/trt_backend.h deleted file mode 100644 index 376da241f..000000000 --- a/csrcs/fastdeploy/backends/tensorrt/trt_backend.h +++ /dev/null @@ -1,113 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include -#include - -#include "fastdeploy/backends/backend.h" - -#include "fastdeploy/backends/tensorrt/common/argsParser.h" -#include "fastdeploy/backends/tensorrt/common/buffers.h" -#include "fastdeploy/backends/tensorrt/common/common.h" -#include "fastdeploy/backends/tensorrt/common/logger.h" -#include "fastdeploy/backends/tensorrt/common/parserOnnxConfig.h" -#include "fastdeploy/backends/tensorrt/common/sampleUtils.h" - -#include -#include "NvInfer.h" - -namespace fastdeploy { -using namespace samplesCommon; - -struct TrtValueInfo { - std::string name; - std::vector shape; - nvinfer1::DataType dtype; -}; - -struct TrtBackendOption { - int gpu_id = 0; - bool enable_fp16 = false; - bool enable_int8 = false; - size_t max_batch_size = 32; - size_t max_workspace_size = 1 << 30; - std::map> max_shape; - std::map> min_shape; - std::map> opt_shape; - std::string serialize_file = ""; - - // inside parameter, maybe remove next version - bool remove_multiclass_nms_ = false; - std::map custom_op_info_; -}; - -std::vector toVec(const nvinfer1::Dims& dim); -size_t TrtDataTypeSize(const nvinfer1::DataType& dtype); -FDDataType GetFDDataType(const nvinfer1::DataType& dtype); - -class TrtBackend : public BaseBackend { - public: - TrtBackend() : engine_(nullptr), context_(nullptr) {} - virtual ~TrtBackend() = default; - void BuildOption(const TrtBackendOption& option); - - bool InitFromPaddle(const std::string& model_file, - const std::string& params_file, - const TrtBackendOption& option = TrtBackendOption(), - bool verbose = false); - bool InitFromOnnx(const std::string& model_file, - const TrtBackendOption& option = TrtBackendOption(), - bool from_memory_buffer = false); - bool InitFromTrt(const std::string& trt_engine_file, - const TrtBackendOption& option = TrtBackendOption()); - - bool Infer(std::vector& inputs, std::vector* outputs); - - int NumInputs() const { return inputs_desc_.size(); } - int NumOutputs() const { return outputs_desc_.size(); } - TensorInfo GetInputInfo(int index); - TensorInfo GetOutputInfo(int index); - - private: - std::shared_ptr engine_; - std::shared_ptr context_; - SampleUniquePtr parser_; - SampleUniquePtr builder_; - SampleUniquePtr network_; - cudaStream_t stream_{}; - std::vector bindings_; - std::vector inputs_desc_; - std::vector outputs_desc_; - std::map inputs_buffer_; - std::map outputs_buffer_; - - // Sometimes while the number of outputs > 1 - // the output order of tensorrt may not be same - // with the original onnx model - // So this parameter will record to origin outputs - // order, to help recover the rigt order - std::map outputs_order_; - - void GetInputOutputInfo(); - void AllocateBufferInDynamicShape(const std::vector& inputs, - std::vector* outputs); - bool CreateTrtEngine(const std::string& onnx_model, - const TrtBackendOption& option); -}; - -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/core/config.h.in b/csrcs/fastdeploy/core/config.h.in deleted file mode 100644 index 771392586..000000000 --- a/csrcs/fastdeploy/core/config.h.in +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#pragma once - -#ifndef FASTDEPLOY_DEBUG -#cmakedefine FASTDEPLOY_DEBUG -#endif - -#ifndef FASTDEPLOY_LIB -#cmakedefine FASTDEPLOY_LIB -#endif - -#ifndef ENABLE_PADDLE_FRONTEND -#cmakedefine ENABLE_PADDLE_FRONTEND -#endif - -#ifndef ENABLE_ORT_BACKEND -#cmakedefine ENABLE_ORT_BACKEND -#endif - -#ifndef ENABLE_PADDLE_BACKEND -#cmakedefine ENABLE_PADDLE_BACKEND -#endif - -#ifndef WITH_GPU -#cmakedefine WITH_GPU -#endif - -#ifndef ENABLE_TRT_BACKEND -#cmakedefine ENABLE_TRT_BACKEND -#endif - -#ifndef ENABLE_VISION -#cmakedefine ENABLE_VISION -#endif - -#ifndef ENABLE_OPENCV_CUDA -#cmakedefine ENABLE_OPENCV_CUDA -#endif - -#ifndef ENABLE_VISION_VISUALIZE -#cmakedefine ENABLE_VISION_VISUALIZE -#endif diff --git a/csrcs/fastdeploy/core/fd_tensor.cc b/csrcs/fastdeploy/core/fd_tensor.cc deleted file mode 100644 index c6f7a4739..000000000 --- a/csrcs/fastdeploy/core/fd_tensor.cc +++ /dev/null @@ -1,134 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/core/fd_tensor.h" -#include "fastdeploy/utils/utils.h" - -#ifdef WITH_GPU -#include -#endif - -namespace fastdeploy { - -void* FDTensor::MutableData() { - if (external_data_ptr != nullptr) { - return external_data_ptr; - } - return data.data(); -} - -void* FDTensor::Data() { - if (external_data_ptr != nullptr) { - if (device == Device::GPU) { -#ifdef WITH_GPU - // need to copy cuda mem to cpu first - temporary_cpu_buffer.resize(Nbytes()); - FDASSERT(cudaMemcpy(temporary_cpu_buffer.data(), external_data_ptr, - Nbytes(), cudaMemcpyDeviceToHost) == 0, - "[ERROR] Error occurs while copy memory from GPU to CPU"); - return temporary_cpu_buffer.data(); -#else - FDASSERT(false, - "The FastDeploy didn't compile under -DWITH_GPU=ON, so this is " - "an unexpected problem happend."); -#endif - } else { - return external_data_ptr; - } - } - return data.data(); -} - -const void* FDTensor::Data() const { - if (external_data_ptr != nullptr) { - return external_data_ptr; - } - return data.data(); -} - -void FDTensor::SetExternalData(const std::vector& new_shape, - const FDDataType& data_type, void* data_buffer) { - dtype = data_type; - shape.assign(new_shape.begin(), new_shape.end()); - external_data_ptr = data_buffer; -} - -void FDTensor::Allocate(const std::vector& new_shape, - const FDDataType& data_type, - const std::string& tensor_name) { - dtype = data_type; - name = tensor_name; - shape.assign(new_shape.begin(), new_shape.end()); - int unit = FDDataTypeSize(data_type); - int total_size = - std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); - data.resize(total_size * unit); -} - -int FDTensor::Nbytes() const { return Numel() * FDDataTypeSize(dtype); } - -int FDTensor::Numel() const { - return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); -} - -template -void CalculateStatisInfo(void* src_ptr, int size, double* mean, double* max, - double* min) { - T* ptr = static_cast(src_ptr); - *mean = 0; - *max = -99999999; - *min = 99999999; - for (int i = 0; i < size; ++i) { - if (*(ptr + i) > *max) { - *max = *(ptr + i); - } - if (*(ptr + i) < *min) { - *min = *(ptr + i); - } - *mean += *(ptr + i); - } - *mean = *mean / size; -} - -void FDTensor::PrintInfo(const std::string& prefix) { - double mean = 0; - double max = -99999999; - double min = 99999999; - if (dtype == FDDataType::FP32) { - CalculateStatisInfo(Data(), Numel(), &mean, &max, &min); - } else if (dtype == FDDataType::FP64) { - CalculateStatisInfo(Data(), Numel(), &mean, &max, &min); - } else if (dtype == FDDataType::INT8) { - CalculateStatisInfo(Data(), Numel(), &mean, &max, &min); - } else if (dtype == FDDataType::UINT8) { - CalculateStatisInfo(Data(), Numel(), &mean, &max, &min); - } else if (dtype == FDDataType::INT32) { - CalculateStatisInfo(Data(), Numel(), &mean, &max, &min); - } else if (dtype == FDDataType::INT64) { - CalculateStatisInfo(Data(), Numel(), &mean, &max, &min); - } else { - FDASSERT(false, - "PrintInfo function doesn't support current situation, maybe you " - "need enhance this function now.") - } - std::cout << prefix << ": shape="; - for (int i = 0; i < shape.size(); ++i) { - std::cout << shape[i] << " "; - } - std::cout << ", dtype=" << Str(dtype) << ", mean=" << mean << ", max=" << max - << ", min=" << min << std::endl; -} - -FDTensor::FDTensor(const std::string& tensor_name) { name = tensor_name; } -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/core/fd_tensor.h b/csrcs/fastdeploy/core/fd_tensor.h deleted file mode 100644 index 84e8c7ff0..000000000 --- a/csrcs/fastdeploy/core/fd_tensor.h +++ /dev/null @@ -1,87 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#pragma once - -#include -#include -#include -#include - -#include "fastdeploy/core/fd_type.h" - -namespace fastdeploy { - -struct FASTDEPLOY_DECL FDTensor { - std::vector data; - std::vector shape; - std::string name = ""; - FDDataType dtype; - - // This use to skip memory copy step - // the external_data_ptr will point to the user allocated memory - // user has to maintain the memory, allocate and release - void* external_data_ptr = nullptr; - // The internal data will be on CPU - // Some times, the external data is on the GPU, and we are going to use - // GPU to inference the model - // so we can skip data transfer, which may improve the efficience - Device device = Device::CPU; - - // if the external data is not on CPU, we use this temporary buffer - // to transfer data to CPU at some cases we need to visit the - // other devices' data - std::vector temporary_cpu_buffer; - - // Get data buffer pointer - void* MutableData(); - - // Use this data to get the tensor data to process - // Since the most senario is process data in CPU - // this function weill return a pointer to cpu memory - // buffer. - // If the original data is on other device, the data - // will copy to cpu store in `temporary_cpu_buffer` - void* Data(); - - const void* Data() const; - - // Set user memory buffer for Tensor, the memory is managed by - // the user it self, but the Tensor will share the memory with user - // So take care with the user buffer - void SetExternalData(const std::vector& new_shape, - const FDDataType& data_type, void* data_buffer); - - // Initialize Tensor - // Include setting attribute for tensor - // and allocate cpu memory buffer - void Allocate(const std::vector& new_shape, - const FDDataType& data_type, - const std::string& tensor_name = ""); - - // Total size of tensor memory buffer in bytes - int Nbytes() const; - - // Total number of elements in this tensor - int Numel() const; - - // Debug function - // Use this function to print shape, dtype, mean, max, min - // prefix will also be printed as tag - void PrintInfo(const std::string& prefix = "TensorInfo: "); - - FDTensor() {} - explicit FDTensor(const std::string& tensor_name); -}; - -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/core/fd_type.cc b/csrcs/fastdeploy/core/fd_type.cc deleted file mode 100644 index ae70fa6e5..000000000 --- a/csrcs/fastdeploy/core/fd_type.cc +++ /dev/null @@ -1,123 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/core/fd_type.h" -#include "fastdeploy/utils/utils.h" - -namespace fastdeploy { - -int FDDataTypeSize(const FDDataType& data_type) { - FDASSERT(data_type != FDDataType::FP16, "Float16 is not supported."); - if (data_type == FDDataType::BOOL) { - return sizeof(bool); - } else if (data_type == FDDataType::INT16) { - return sizeof(int16_t); - } else if (data_type == FDDataType::INT32) { - return sizeof(int32_t); - } else if (data_type == FDDataType::INT64) { - return sizeof(int64_t); - } else if (data_type == FDDataType::FP32) { - return sizeof(float); - } else if (data_type == FDDataType::FP64) { - return sizeof(double); - } else if (data_type == FDDataType::UINT8) { - return sizeof(uint8_t); - } else { - FDASSERT(false, "Unexpected data type: " + Str(data_type)); - } - return -1; -} - -std::string Str(const Device& d) { - std::string out; - switch (d) { - case Device::DEFAULT: - out = "Device::DEFAULT"; - break; - case Device::CPU: - out = "Device::CPU"; - break; - case Device::GPU: - out = "Device::GPU"; - break; - default: - out = "Device::UNKOWN"; - } - return out; -} - -std::string Str(const FDDataType& fdt) { - std::string out; - switch (fdt) { - case FDDataType::BOOL: - out = "FDDataType::BOOL"; - break; - case FDDataType::INT16: - out = "FDDataType::INT16"; - break; - case FDDataType::INT32: - out = "FDDataType::INT32"; - break; - case FDDataType::INT64: - out = "FDDataType::INT64"; - break; - case FDDataType::FP32: - out = "FDDataType::FP32"; - break; - case FDDataType::FP64: - out = "FDDataType::FP64"; - break; - case FDDataType::FP16: - out = "FDDataType::FP16"; - break; - case FDDataType::UINT8: - out = "FDDataType::UINT8"; - break; - case FDDataType::INT8: - out = "FDDataType::INT8"; - break; - default: - out = "FDDataType::UNKNOWN"; - } - return out; -} - -template -const FDDataType TypeToDataType::dtype = UNKNOWN1; - -template <> -const FDDataType TypeToDataType::dtype = BOOL; - -template <> -const FDDataType TypeToDataType::dtype = INT16; - -template <> -const FDDataType TypeToDataType::dtype = INT32; - -template <> -const FDDataType TypeToDataType::dtype = INT64; - -template <> -const FDDataType TypeToDataType::dtype = FP32; - -template <> -const FDDataType TypeToDataType::dtype = FP64; - -template <> -const FDDataType TypeToDataType::dtype = UINT8; - -template <> -const FDDataType TypeToDataType::dtype = INT8; - -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/core/fd_type.h b/csrcs/fastdeploy/core/fd_type.h deleted file mode 100644 index 50b00dca8..000000000 --- a/csrcs/fastdeploy/core/fd_type.h +++ /dev/null @@ -1,63 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#pragma once - -#include -#include -#include - -#include "fastdeploy/core/config.h" -#include "fastdeploy/utils/utils.h" - -namespace fastdeploy { - -enum FASTDEPLOY_DECL Device { DEFAULT, CPU, GPU }; - -FASTDEPLOY_DECL std::string Str(const Device& d); - -enum FASTDEPLOY_DECL FDDataType { - BOOL, - INT16, - INT32, - INT64, - FP16, - FP32, - FP64, - UNKNOWN1, - UNKNOWN2, - UNKNOWN3, - UNKNOWN4, - UNKNOWN5, - UNKNOWN6, - UNKNOWN7, - UNKNOWN8, - UNKNOWN9, - UNKNOWN10, - UNKNOWN11, - UNKNOWN12, - UNKNOWN13, - UINT8, - INT8 -}; - -FASTDEPLOY_DECL std::string Str(const FDDataType& fdt); - -FASTDEPLOY_DECL int32_t FDDataTypeSize(const FDDataType& data_dtype); - -template -struct FASTDEPLOY_DECL TypeToDataType { - static const FDDataType dtype; -}; - -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/fastdeploy_model.cc b/csrcs/fastdeploy/fastdeploy_model.cc deleted file mode 100644 index c4dbc70a7..000000000 --- a/csrcs/fastdeploy/fastdeploy_model.cc +++ /dev/null @@ -1,145 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#include "fastdeploy/fastdeploy_model.h" -#include "fastdeploy/utils/unique_ptr.h" -#include "fastdeploy/utils/utils.h" - -namespace fastdeploy { - -bool FastDeployModel::InitRuntime() { - FDASSERT( - CheckModelFormat(runtime_option.model_file, runtime_option.model_format), - "ModelFormatCheck Failed."); - if (runtime_initialized_) { - FDERROR << "The model is already initialized, cannot be initliazed again." - << std::endl; - return false; - } - if (runtime_option.backend != Backend::UNKNOWN) { - if (runtime_option.backend == Backend::ORT) { - if (!IsBackendAvailable(Backend::ORT)) { - FDERROR - << "Backend::ORT is not complied with current FastDeploy library." - << std::endl; - return false; - } - } else if (runtime_option.backend == Backend::TRT) { - if (!IsBackendAvailable(Backend::TRT)) { - FDERROR - << "Backend::TRT is not complied with current FastDeploy library." - << std::endl; - return false; - } - } else if (runtime_option.backend == Backend::PDINFER) { - if (!IsBackendAvailable(Backend::PDINFER)) { - FDERROR << "Backend::PDINFER is not compiled with current FastDeploy " - "library." - << std::endl; - return false; - } - } else { - FDERROR - << "Only support Backend::ORT / Backend::TRT / Backend::PDINFER now." - << std::endl; - return false; - } - runtime_ = utils::make_unique(); - if (!runtime_->Init(runtime_option)) { - return false; - } - runtime_initialized_ = true; - return true; - } - - if (runtime_option.device == Device::CPU) { - return CreateCpuBackend(); - } else if (runtime_option.device == Device::GPU) { -#ifdef WITH_GPU - return CreateGpuBackend(); -#else - FDERROR << "The compiled FastDeploy library doesn't support GPU now." - << std::endl; - return false; -#endif - } - FDERROR << "Only support CPU/GPU now." << std::endl; - return false; -} - -bool FastDeployModel::CreateCpuBackend() { - if (valid_cpu_backends.size() == 0) { - FDERROR << "There's no valid cpu backends for model: " << ModelName() - << std::endl; - return false; - } - - for (size_t i = 0; i < valid_cpu_backends.size(); ++i) { - if (!IsBackendAvailable(valid_cpu_backends[i])) { - continue; - } - runtime_option.backend = valid_cpu_backends[i]; - runtime_ = std::unique_ptr(new Runtime()); - if (!runtime_->Init(runtime_option)) { - return false; - } - runtime_initialized_ = true; - return true; - } - FDERROR << "Found no valid backend for model: " << ModelName() << std::endl; - return false; -} - -bool FastDeployModel::CreateGpuBackend() { - if (valid_gpu_backends.size() == 0) { - FDERROR << "There's no valid gpu backends for model: " << ModelName() - << std::endl; - return false; - } - - for (size_t i = 0; i < valid_gpu_backends.size(); ++i) { - if (!IsBackendAvailable(valid_gpu_backends[i])) { - continue; - } - runtime_option.backend = valid_gpu_backends[i]; - runtime_ = std::unique_ptr(new Runtime()); - if (!runtime_->Init(runtime_option)) { - return false; - } - runtime_initialized_ = true; - return true; - } - FDERROR << "Cannot find an available gpu backend to load this model." - << std::endl; - return false; -} - -bool FastDeployModel::Infer(std::vector& input_tensors, - std::vector* output_tensors) { - return runtime_->Infer(input_tensors, output_tensors); -} - -void FastDeployModel::EnableDebug() { -#ifdef FASTDEPLOY_DEBUG - debug_ = true; -#else - FDWARNING << "The compile FastDeploy is not with -DENABLE_DEBUG=ON, so " - "cannot enable debug mode." - << std::endl; - debug_ = false; -#endif -} - -bool FastDeployModel::DebugEnabled() { return debug_; } - -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/fastdeploy_model.h b/csrcs/fastdeploy/fastdeploy_model.h deleted file mode 100644 index df83ac525..000000000 --- a/csrcs/fastdeploy/fastdeploy_model.h +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#pragma once -#include "fastdeploy/fastdeploy_runtime.h" - -namespace fastdeploy { - -class FASTDEPLOY_DECL FastDeployModel { - public: - virtual std::string ModelName() const { return "NameUndefined"; } - - virtual bool InitRuntime(); - virtual bool CreateCpuBackend(); - virtual bool CreateGpuBackend(); - virtual bool Infer(std::vector& input_tensors, - std::vector* output_tensors); - - RuntimeOption runtime_option; - std::vector valid_cpu_backends = {Backend::ORT}; - std::vector valid_gpu_backends = {Backend::ORT}; - std::vector valid_external_backends; - bool initialized = false; - virtual int NumInputsOfRuntime() { return runtime_->NumInputs(); } - virtual int NumOutputsOfRuntime() { return runtime_->NumOutputs(); } - virtual TensorInfo InputInfoOfRuntime(int index) { - return runtime_->GetInputInfo(index); - } - virtual TensorInfo OutputInfoOfRuntime(int index) { - return runtime_->GetOutputInfo(index); - } - virtual bool Initialized() const { - return runtime_initialized_ && initialized; - } - - virtual void EnableDebug(); - virtual bool DebugEnabled(); - - private: - std::unique_ptr runtime_; - bool runtime_initialized_ = false; - bool debug_ = false; -}; - -#define TIMERECORD_START(id) \ - TimeCounter tc_##id; \ - tc_##id.Start(); - -#define TIMERECORD_END(id, prefix) \ - if (DebugEnabled()) { \ - tc_##id.End(); \ - FDLogger() << __FILE__ << "(" << __LINE__ << "):" << __FUNCTION__ << " " \ - << prefix << " duration = " << tc_##id.Duration() << "s." \ - << std::endl; \ - } - -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/fastdeploy_runtime.cc b/csrcs/fastdeploy/fastdeploy_runtime.cc deleted file mode 100644 index e5c41a29a..000000000 --- a/csrcs/fastdeploy/fastdeploy_runtime.cc +++ /dev/null @@ -1,365 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/fastdeploy_runtime.h" -#include "fastdeploy/utils/unique_ptr.h" -#include "fastdeploy/utils/utils.h" - -#ifdef ENABLE_ORT_BACKEND -#include "fastdeploy/backends/ort/ort_backend.h" -#endif - -#ifdef ENABLE_TRT_BACKEND -#include "fastdeploy/backends/tensorrt/trt_backend.h" -#endif - -#ifdef ENABLE_PADDLE_BACKEND -#include "fastdeploy/backends/paddle/paddle_backend.h" -#endif - -namespace fastdeploy { - -std::vector GetAvailableBackends() { - std::vector backends; -#ifdef ENABLE_ORT_BACKEND - backends.push_back(Backend::ORT); -#endif -#ifdef ENABLE_TRT_BACKEND - backends.push_back(Backend::TRT); -#endif -#ifdef ENABLE_PADDLE_BACKEND - backends.push_back(Backend::PDINFER); -#endif - return backends; -} - -bool IsBackendAvailable(const Backend& backend) { - std::vector backends = GetAvailableBackends(); - for (size_t i = 0; i < backends.size(); ++i) { - if (backend == backends[i]) { - return true; - } - } - return false; -} - -std::string Str(const Backend& b) { - if (b == Backend::ORT) { - return "Backend::ORT"; - } else if (b == Backend::TRT) { - return "Backend::TRT"; - } else if (b == Backend::PDINFER) { - return "Backend::PDINFER"; - } - return "UNKNOWN-Backend"; -} - -std::string Str(const Frontend& f) { - if (f == Frontend::PADDLE) { - return "Frontend::PADDLE"; - } else if (f == Frontend::ONNX) { - return "Frontend::ONNX"; - } - return "UNKNOWN-Frontend"; -} - -bool CheckModelFormat(const std::string& model_file, - const Frontend& model_format) { - if (model_format == Frontend::PADDLE) { - if (model_file.size() < 8 || - model_file.substr(model_file.size() - 8, 8) != ".pdmodel") { - FDERROR << "With model format of Frontend::PADDLE, the model file " - "should ends with `.pdmodel`, but now it's " - << model_file << std::endl; - return false; - } - } else if (model_format == Frontend::ONNX) { - if (model_file.size() < 5 || - model_file.substr(model_file.size() - 5, 5) != ".onnx") { - FDERROR << "With model format of Frontend::ONNX, the model file " - "should ends with `.onnx`, but now it's " - << model_file << std::endl; - return false; - } - } else { - FDERROR << "Only support model format with frontend Frontend::PADDLE / " - "Frontend::ONNX." - << std::endl; - return false; - } - return true; -} - -Frontend GuessModelFormat(const std::string& model_file) { - if (model_file.size() > 8 && - model_file.substr(model_file.size() - 8, 8) == ".pdmodel") { - FDLogger() << "Model Format: PaddlePaddle." << std::endl; - return Frontend::PADDLE; - } else if (model_file.size() > 5 && - model_file.substr(model_file.size() - 5, 5) == ".onnx") { - FDLogger() << "Model Format: ONNX." << std::endl; - return Frontend::ONNX; - } - - FDERROR << "Cannot guess which model format you are using, please set " - "RuntimeOption::model_format manually." - << std::endl; - return Frontend::PADDLE; -} - -void RuntimeOption::SetModelPath(const std::string& model_path, - const std::string& params_path, - const std::string& _model_format) { - if (_model_format == "paddle") { - model_file = model_path; - params_file = params_path; - model_format = Frontend::PADDLE; - } else if (_model_format == "onnx") { - model_file = model_path; - model_format = Frontend::ONNX; - } else { - FDASSERT(false, "The model format only can be 'paddle' or 'onnx'."); - } -} - -void RuntimeOption::UseGpu(int gpu_id) { -#ifdef WITH_GPU - device = Device::GPU; - device_id = gpu_id; -#else - FDWARNING << "The FastDeploy didn't compile with GPU, will force to use CPU." - << std::endl; - device = Device::CPU; -#endif -} - -void RuntimeOption::UseCpu() { device = Device::CPU; } - -void RuntimeOption::SetCpuThreadNum(int thread_num) { - FDASSERT(thread_num > 0, "The thread_num must be greater than 0."); - cpu_thread_num = thread_num; -} - -// use paddle inference backend -void RuntimeOption::UsePaddleBackend() { -#ifdef ENABLE_PADDLE_BACKEND - backend = Backend::PDINFER; -#else - FDASSERT(false, "The FastDeploy didn't compile with Paddle Inference."); -#endif -} - -// use onnxruntime backend -void RuntimeOption::UseOrtBackend() { -#ifdef ENABLE_ORT_BACKEND - backend = Backend::ORT; -#else - FDASSERT(false, "The FastDeploy didn't compile with OrtBackend."); -#endif -} - -void RuntimeOption::UseTrtBackend() { -#ifdef ENABLE_TRT_BACKEND - backend = Backend::TRT; -#else - FDASSERT(false, "The FastDeploy didn't compile with TrtBackend."); -#endif -} - -void RuntimeOption::EnablePaddleMKLDNN() { pd_enable_mkldnn = true; } - -void RuntimeOption::DisablePaddleMKLDNN() { pd_enable_mkldnn = false; } - -void RuntimeOption::SetPaddleMKLDNNCacheSize(int size) { - FDASSERT(size > 0, "Parameter size must greater than 0."); - pd_mkldnn_cache_size = size; -} - -void RuntimeOption::SetTrtInputShape(const std::string& input_name, - const std::vector& min_shape, - const std::vector& opt_shape, - const std::vector& max_shape) { - trt_min_shape[input_name].clear(); - trt_max_shape[input_name].clear(); - trt_opt_shape[input_name].clear(); - trt_min_shape[input_name].assign(min_shape.begin(), min_shape.end()); - if (opt_shape.size() == 0) { - trt_opt_shape[input_name].assign(min_shape.begin(), min_shape.end()); - } else { - trt_opt_shape[input_name].assign(opt_shape.begin(), opt_shape.end()); - } - if (max_shape.size() == 0) { - trt_max_shape[input_name].assign(min_shape.begin(), min_shape.end()); - } else { - trt_max_shape[input_name].assign(max_shape.begin(), max_shape.end()); - } -} - -void RuntimeOption::EnableTrtFP16() { trt_enable_fp16 = true; } - -void RuntimeOption::DisableTrtFP16() { trt_enable_fp16 = false; } - -void RuntimeOption::SetTrtCacheFile(const std::string& cache_file_path) { - trt_serialize_file = cache_file_path; -} - -bool Runtime::Init(const RuntimeOption& _option) { - option = _option; - if (option.model_format == Frontend::AUTOREC) { - option.model_format = GuessModelFormat(_option.model_file); - } - if (option.backend == Backend::UNKNOWN) { - if (IsBackendAvailable(Backend::ORT)) { - option.backend = Backend::ORT; - } else if (IsBackendAvailable(Backend::PDINFER)) { - option.backend = Backend::PDINFER; - } else { - FDERROR << "Please define backend in RuntimeOption, current it's " - "Backend::UNKNOWN." - << std::endl; - return false; - } - } - if (option.backend == Backend::ORT) { - FDASSERT(option.device == Device::CPU || option.device == Device::GPU, - "Backend::TRT only supports Device::CPU/Device::GPU."); - CreateOrtBackend(); - } else if (option.backend == Backend::TRT) { - FDASSERT(option.device == Device::GPU, - "Backend::TRT only supports Device::GPU."); - CreateTrtBackend(); - } else if (option.backend == Backend::PDINFER) { - FDASSERT(option.device == Device::CPU || option.device == Device::GPU, - "Backend::TRT only supports Device::CPU/Device::GPU."); - FDASSERT( - option.model_format == Frontend::PADDLE, - "Backend::PDINFER only supports model format of Frontend::PADDLE."); - CreatePaddleBackend(); - } else { - FDERROR << "Runtime only support " - "Backend::ORT/Backend::TRT/Backend::PDINFER as backend now." - << std::endl; - return false; - } - return true; -} - -TensorInfo Runtime::GetInputInfo(int index) { - return backend_->GetInputInfo(index); -} - -TensorInfo Runtime::GetOutputInfo(int index) { - return backend_->GetOutputInfo(index); -} - -bool Runtime::Infer(std::vector& input_tensors, - std::vector* output_tensors) { - return backend_->Infer(input_tensors, output_tensors); -} - -void Runtime::CreatePaddleBackend() { -#ifdef ENABLE_PADDLE_BACKEND - auto pd_option = PaddleBackendOption(); - pd_option.enable_mkldnn = option.pd_enable_mkldnn; - pd_option.mkldnn_cache_size = option.pd_mkldnn_cache_size; - pd_option.use_gpu = (option.device == Device::GPU) ? true : false; - pd_option.gpu_id = option.device_id; - pd_option.cpu_thread_num = option.cpu_thread_num; - FDASSERT(option.model_format == Frontend::PADDLE, - "PaddleBackend only support model format of Frontend::PADDLE."); - backend_ = utils::make_unique(); - auto casted_backend = dynamic_cast(backend_.get()); - FDASSERT(casted_backend->InitFromPaddle(option.model_file, option.params_file, - pd_option), - "Load model from Paddle failed while initliazing PaddleBackend."); -#else - FDASSERT(false, - "PaddleBackend is not available, please compiled with " - "ENABLE_PADDLE_BACKEND=ON."); -#endif -} - -void Runtime::CreateOrtBackend() { -#ifdef ENABLE_ORT_BACKEND - auto ort_option = OrtBackendOption(); - ort_option.graph_optimization_level = option.ort_graph_opt_level; - ort_option.intra_op_num_threads = option.cpu_thread_num; - ort_option.inter_op_num_threads = option.ort_inter_op_num_threads; - ort_option.execution_mode = option.ort_execution_mode; - ort_option.use_gpu = (option.device == Device::GPU) ? true : false; - ort_option.gpu_id = option.device_id; - - // TODO(jiangjiajun): inside usage, maybe remove this later - ort_option.remove_multiclass_nms_ = option.remove_multiclass_nms_; - ort_option.custom_op_info_ = option.custom_op_info_; - - FDASSERT(option.model_format == Frontend::PADDLE || - option.model_format == Frontend::ONNX, - "OrtBackend only support model format of Frontend::PADDLE / " - "Frontend::ONNX."); - backend_ = utils::make_unique(); - auto casted_backend = dynamic_cast(backend_.get()); - if (option.model_format == Frontend::ONNX) { - FDASSERT(casted_backend->InitFromOnnx(option.model_file, ort_option), - "Load model from ONNX failed while initliazing OrtBackend."); - } else { - FDASSERT(casted_backend->InitFromPaddle(option.model_file, - option.params_file, ort_option), - "Load model from Paddle failed while initliazing OrtBackend."); - } -#else - FDASSERT(false, - "OrtBackend is not available, please compiled with " - "ENABLE_ORT_BACKEND=ON."); -#endif -} - -void Runtime::CreateTrtBackend() { -#ifdef ENABLE_TRT_BACKEND - auto trt_option = TrtBackendOption(); - trt_option.gpu_id = option.device_id; - trt_option.enable_fp16 = option.trt_enable_fp16; - trt_option.enable_int8 = option.trt_enable_int8; - trt_option.max_batch_size = option.trt_max_batch_size; - trt_option.max_workspace_size = option.trt_max_workspace_size; - trt_option.max_shape = option.trt_max_shape; - trt_option.min_shape = option.trt_min_shape; - trt_option.opt_shape = option.trt_opt_shape; - trt_option.serialize_file = option.trt_serialize_file; - - // TODO(jiangjiajun): inside usage, maybe remove this later - trt_option.remove_multiclass_nms_ = option.remove_multiclass_nms_; - trt_option.custom_op_info_ = option.custom_op_info_; - - FDASSERT(option.model_format == Frontend::PADDLE || - option.model_format == Frontend::ONNX, - "TrtBackend only support model format of Frontend::PADDLE / " - "Frontend::ONNX."); - backend_ = utils::make_unique(); - auto casted_backend = dynamic_cast(backend_.get()); - if (option.model_format == Frontend::ONNX) { - FDASSERT(casted_backend->InitFromOnnx(option.model_file, trt_option), - "Load model from ONNX failed while initliazing TrtBackend."); - } else { - FDASSERT(casted_backend->InitFromPaddle(option.model_file, - option.params_file, trt_option), - "Load model from Paddle failed while initliazing TrtBackend."); - } -#else - FDASSERT(false, - "TrtBackend is not available, please compiled with " - "ENABLE_TRT_BACKEND=ON."); -#endif -} -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/fastdeploy_runtime.h b/csrcs/fastdeploy/fastdeploy_runtime.h deleted file mode 100644 index 780945458..000000000 --- a/csrcs/fastdeploy/fastdeploy_runtime.h +++ /dev/null @@ -1,159 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#pragma once - -#include -#include - -#include "fastdeploy/backends/backend.h" -#include "fastdeploy/utils/perf.h" - -namespace fastdeploy { - -enum FASTDEPLOY_DECL Backend { UNKNOWN, ORT, TRT, PDINFER }; -// AUTOREC will according to the name of model file -// to decide which Frontend is -enum FASTDEPLOY_DECL Frontend { AUTOREC, PADDLE, ONNX }; - -FASTDEPLOY_DECL std::string Str(const Backend& b); -FASTDEPLOY_DECL std::string Str(const Frontend& f); -FASTDEPLOY_DECL std::vector GetAvailableBackends(); - -FASTDEPLOY_DECL bool IsBackendAvailable(const Backend& backend); - -bool CheckModelFormat(const std::string& model_file, - const Frontend& model_format); -Frontend GuessModelFormat(const std::string& model_file); - -struct FASTDEPLOY_DECL RuntimeOption { - // set path of model file and params file - // for onnx, only need to define model_file, but also need to - // define model_format - // model_format support 'paddle' / 'onnx' now. - void SetModelPath(const std::string& model_path, - const std::string& params_path = "", - const std::string& _model_format = "paddle"); - - // set model inference in GPU - void UseCpu(); - - // set model inference in CPU - void UseGpu(int gpu_id = 0); - - // set number of thread while inference in CPU - void SetCpuThreadNum(int thread_num); - - // use paddle inference backend - void UsePaddleBackend(); - - // use onnxruntime backend - void UseOrtBackend(); - - // use tensorrt backend - void UseTrtBackend(); - - // enable mkldnn while use paddle inference in CPU - void EnablePaddleMKLDNN(); - // disable mkldnn while use paddle inference in CPU - void DisablePaddleMKLDNN(); - - // set size of cached shape while enable mkldnn with paddle inference backend - void SetPaddleMKLDNNCacheSize(int size); - - // set tensorrt shape while the inputs of model contain dynamic shape - // min_shape: the minimum shape - // opt_shape: the most common shape while inference, default be empty - // max_shape: the maximum shape, default be empty - - // if opt_shape, max_shape are empty, they will keep same with the min_shape - // which means the shape will be fixed as min_shape while inference - void SetTrtInputShape( - const std::string& input_name, const std::vector& min_shape, - const std::vector& opt_shape = std::vector(), - const std::vector& max_shape = std::vector()); - - // enable half precision while use tensorrt backend - void EnableTrtFP16(); - // disable half precision, change to full precision(float32) - void DisableTrtFP16(); - - void SetTrtCacheFile(const std::string& cache_file_path); - - Backend backend = Backend::UNKNOWN; - // for cpu inference and preprocess - int cpu_thread_num = 8; - int device_id = 0; - - Device device = Device::CPU; - - // ======Only for ORT Backend======== - // -1 means use default value by ort - // 0: ORT_DISABLE_ALL 1: ORT_ENABLE_BASIC 2: ORT_ENABLE_EXTENDED 3: - // ORT_ENABLE_ALL - int ort_graph_opt_level = -1; - int ort_inter_op_num_threads = -1; - // 0: ORT_SEQUENTIAL 1: ORT_PARALLEL - int ort_execution_mode = -1; - - // ======Only for Paddle Backend===== - bool pd_enable_mkldnn = true; - int pd_mkldnn_cache_size = 1; - - // ======Only for Trt Backend======= - std::map> trt_max_shape; - std::map> trt_min_shape; - std::map> trt_opt_shape; - std::string trt_serialize_file = ""; - bool trt_enable_fp16 = false; - bool trt_enable_int8 = false; - size_t trt_max_batch_size = 32; - size_t trt_max_workspace_size = 1 << 30; - - std::string model_file = ""; // Path of model file - std::string params_file = ""; // Path of parameters file, can be empty - Frontend model_format = Frontend::AUTOREC; // format of input model - - // inside parameters, only for inside usage - // remove multiclass_nms in Paddle2ONNX - bool remove_multiclass_nms_ = false; - // for Paddle2ONNX to export custom operators - std::map custom_op_info_; -}; - -struct FASTDEPLOY_DECL Runtime { - public: - // explicit Runtime(const RuntimeOption& _option = RuntimeOption()); - - bool Init(const RuntimeOption& _option); - - bool Infer(std::vector& input_tensors, - std::vector* output_tensors); - - void CreateOrtBackend(); - - void CreatePaddleBackend(); - - void CreateTrtBackend(); - - int NumInputs() { return backend_->NumInputs(); } - int NumOutputs() { return backend_->NumOutputs(); } - TensorInfo GetInputInfo(int index); - TensorInfo GetOutputInfo(int index); - - RuntimeOption option; - - private: - std::unique_ptr backend_; -}; -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/function/eigen.cc b/csrcs/fastdeploy/function/eigen.cc deleted file mode 100644 index adcfbb195..000000000 --- a/csrcs/fastdeploy/function/eigen.cc +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/function/eigen.h" - -namespace fastdeploy { - -std::shared_ptr EigenDeviceWrapper::instance_ = nullptr; - -std::shared_ptr EigenDeviceWrapper::GetInstance() { - if (instance_ == nullptr) { - instance_ = std::make_shared(); - } - return instance_; -} - -const Eigen::DefaultDevice* EigenDeviceWrapper::GetDevice() const { - return &device_; -} - -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/function/eigen.h b/csrcs/fastdeploy/function/eigen.h deleted file mode 100644 index 32bacf064..000000000 --- a/csrcs/fastdeploy/function/eigen.h +++ /dev/null @@ -1,109 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include -#include "fastdeploy/core/fd_tensor.h" -#include "unsupported/Eigen/CXX11/Tensor" - -namespace fastdeploy { -// EigenDim converts shape into Eigen::DSizes. -template -struct EigenDim { - using Type = Eigen::DSizes; - - static Type From(const std::vector& dims) { - Type ret; - for (int64_t d = 0; d < dims.size(); d++) { - ret[d] = dims[d]; - } - return ret; - } -}; - -// Interpret FDTensor as EigenTensor and EigenConstTensor. -template -struct EigenTensor { - using Type = Eigen::TensorMap>; - - using ConstType = - Eigen::TensorMap>; - - static Type From(FDTensor& tensor, - const std::vector& dims) { // NOLINT - return Type(reinterpret_cast(tensor.Data()), EigenDim::From(dims)); - } - - static Type From(FDTensor& tensor) { // NOLINT - return From(tensor, tensor.shape); - } // NOLINT - - static ConstType From(const FDTensor& tensor, - const std::vector& dims) { - return ConstType(reinterpret_cast(tensor.Data()), - EigenDim::From(dims)); - } - - static ConstType From(const FDTensor& tensor) { - return From(tensor, tensor.shape); - } -}; - -template -struct EigenScalar { - // Scalar tensor (implemented as a rank-0 tensor) of scalar type T. - using Type = Eigen::TensorMap< - Eigen::TensorFixedSize, MajorType, IndexType>>; - using ConstType = Eigen::TensorMap< - Eigen::TensorFixedSize, MajorType, IndexType>>; - - static Type From(FDTensor& tensor) { - return Type(reinterpret_cast(tensor.Data())); - } // NOLINT - - static ConstType From(const FDTensor& tensor) { - return ConstType(reinterpret_cast(tensor.Data())); - } -}; - -template -struct EigenVector : public EigenTensor { - // Flatten reshapes a Tensor into an EigenVector. - static typename EigenVector::Type Flatten(FDTensor& tensor) { // NOLINT - return EigenVector::From(tensor, {tensor.Numel()}); - } - - static typename EigenVector::ConstType Flatten( - const FDTensor& tensor) { // NOLINT - return EigenVector::From(tensor, {tensor.Numel()}); - } -}; - -class EigenDeviceWrapper { - public: - static std::shared_ptr GetInstance(); - const Eigen::DefaultDevice* GetDevice() const; - - private: - Eigen::DefaultDevice device_; - static std::shared_ptr instance_; -}; - -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/function/reduce.cc b/csrcs/fastdeploy/function/reduce.cc deleted file mode 100644 index 897504e05..000000000 --- a/csrcs/fastdeploy/function/reduce.cc +++ /dev/null @@ -1,246 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include - -#include "fastdeploy/function/eigen.h" -#include "fastdeploy/function/reduce.h" -#include "fastdeploy/function/reduce_functor.h" -#include "fastdeploy/utils/utils.h" - -namespace fastdeploy { - -#ifdef ENABLE_FDTENSOR_FUNC - -template -void ReduceFunctor(const FDTensor& input, FDTensor* output, - const std::vector& dims, bool keep_dim) { - auto x = EigenTensor::From(input); - auto x_rank = static_cast(x.dimensions().size()); - auto reduce_dim = Eigen::array(); - std::vector dims_ref = dims; - - auto out_dims = input.shape; - for (size_t i = 0; i < dims_ref.size(); ++i) { - if (dims_ref[i] < 0) dims_ref[i] = x_rank + dims_ref[i]; - reduce_dim[i] = dims_ref[i]; - out_dims[dims_ref[i]] = 1; - } - auto origin_output_dims = out_dims; - output->Allocate(origin_output_dims, TypeToDataType::dtype); - // construct the squeezed output tensor - if (x_rank > 1) { - const int kDelFlag = -2; - for (size_t i = 0; i < dims_ref.size(); ++i) { - out_dims[dims_ref[i]] = kDelFlag; - } - out_dims.erase(remove(out_dims.begin(), out_dims.end(), kDelFlag), - out_dims.end()); - } - - auto& place = *EigenDeviceWrapper::GetInstance()->GetDevice(); - Functor functor; - if (D == 1) { - auto out = EigenScalar::From(*output); - functor(place, &x, &out, reduce_dim); - } else { - auto out = EigenTensor::From(*output, out_dims); - functor(place, &x, &out, reduce_dim); - if (!keep_dim) { - output->shape = std::move(out_dims); - } - } -} - -#define HANDLE_REDUCE_DIM(NDIM, RDIM) \ - if (ndim == NDIM && rdim == RDIM) { \ - ReduceFunctor(input, output, dims, keep_dim); \ - } - -inline void GetShuffledDim(const std::vector& src_dims, - std::vector* dst_dims, - const std::vector& reduced_dims, - std::vector* perm_axis) { - // check if it's a reduced dim - std::vector src_dims_check(src_dims.size(), false); - size_t src_size = src_dims.size(); - size_t reduce_size = reduced_dims.size(); - std::vector regular_reduced_dims = reduced_dims; - for (size_t i = 0; i < regular_reduced_dims.size(); i++) { - if (regular_reduced_dims[i] < 0) { - regular_reduced_dims[i] = src_size + regular_reduced_dims[i]; - } - } - - for (size_t i = 0; i < reduce_size; ++i) { - dst_dims->at(src_size - reduce_size + i) = - src_dims[regular_reduced_dims[i]]; - (*perm_axis)[src_size - reduce_size + i] = regular_reduced_dims[i]; - src_dims_check[regular_reduced_dims[i]] = true; - } - - size_t offset = 0; - for (size_t i = 0; i < src_dims_check.size(); ++i) { - bool is_reduced = src_dims_check[i]; - if (!is_reduced) { - (*perm_axis)[offset] = i; - dst_dims->at(offset++) = src_dims[i]; - } - } -} - -template -void GetShuffledInput(const FDTensor& input, FDTensor* shuffled_input, - const std::vector& dims) { - auto shuffled_dims = input.shape; - std::vector perm_axis(input.shape.size()); - GetShuffledDim(input.shape, &shuffled_dims, dims, &perm_axis); - - shuffled_input->Allocate(shuffled_dims, input.dtype); - // TODO(zhoushunjie) : Need to implement trans function - // phi::funcs::TransposeNormal trans; - // trans(dev_ctx, input, shuffled_input, perm_axis); -} - -//////////////// HandleLargeDim -template -void HandleLargeDim(const FDTensor& input, FDTensor* output, - const std::vector& dims, bool keep_dim) { - // shuffle the reduced dim to the end - FDTensor shuffled_input; - GetShuffledInput(input, &shuffled_input, dims); - - // transpose to 2D tensor whose shape is {unreduced, reduced}. - const int64_t unreduced = output->Numel(); - const int64_t reduced = shuffled_input.Numel() / unreduced; - shuffled_input.Allocate({unreduced, reduced}, TypeToDataType::dtype); - - auto output_dim = output->shape; - output->Allocate({unreduced}, TypeToDataType::dtype); - - ReduceFunctor(shuffled_input, output, {1}, keep_dim); - output->shape = output_dim; -} - -////////////// ReduceKernel - -template -void ReduceKernelImpl(const FDTensor& input, FDTensor* output, - const std::vector& dims, bool keep_dim, - bool reduce_all) { - output->Allocate({1}, TypeToDataType::dtype); - const auto& dev = *EigenDeviceWrapper::GetInstance()->GetDevice(); - if (reduce_all) { - // Flatten and reduce 1-D tensor - auto x = EigenVector::Flatten(input); - auto out = EigenScalar::From(*output); - auto reduce_dim = Eigen::array({{0}}); - - Functor functor; - functor(dev, &x, &out, reduce_dim); - } else { - int ndim = input.shape.size(); - int rdim = dims.size(); - if (ndim > 3) { - HandleLargeDim(input, output, dims, keep_dim); - } else { - HANDLE_REDUCE_DIM(4, 3); - HANDLE_REDUCE_DIM(4, 2); - HANDLE_REDUCE_DIM(4, 1); - HANDLE_REDUCE_DIM(3, 2); - HANDLE_REDUCE_DIM(3, 1); - HANDLE_REDUCE_DIM(2, 1); - HANDLE_REDUCE_DIM(1, 1); - } - } -} - -template -void BoolReduceKernel(const FDTensor& input, FDTensor* output, - const std::vector& dims, bool keep_dim, - bool reduce_all) { - // The dims has full dim, set the reduce_all is True - const auto& input_dim_size = input.shape.size(); - std::set dims_set(dims.begin(), dims.end()); - bool full_dim = true; - for (auto i = 0; i < input_dim_size; i++) { - if (dims_set.find(i) == dims_set.end()) { - full_dim = false; - break; - } - } - reduce_all = (reduce_all || full_dim); - - ReduceKernelImpl(input, output, dims, keep_dim, reduce_all); -} - -template -void Reduce(const FDTensor& x, FDTensor* out, const std::vector& dims, - bool keep_dim, bool reduce_all) { - // If the dims has full dim, set the reduce_all is True - const int& input_dim_size = x.shape.size(); - std::set dims_set(dims.begin(), dims.end()); - bool full_dim = true; - for (int i = 0; i < input_dim_size; ++i) { - if (dims_set.find(i) == dims_set.end() && - dims_set.find(i - input_dim_size) == dims_set.end()) { - full_dim = false; - break; - } - } - reduce_all = (reduce_all || full_dim); - - FD_VISIT_ALL_TYPES(x.dtype, "ReduceKernelImpl", ([&] { - ReduceKernelImpl(x, out, dims, keep_dim, - reduce_all); - })); -} - -void Max(const FDTensor& x, FDTensor* out, const std::vector& dims, - bool keep_dim, bool reduce_all) { - Reduce(x, out, dims, keep_dim, reduce_all); -} - -void Min(const FDTensor& x, FDTensor* out, const std::vector& dims, - bool keep_dim, bool reduce_all) { - Reduce(x, out, dims, keep_dim, reduce_all); -} - -void Sum(const FDTensor& x, FDTensor* out, const std::vector& dims, - bool keep_dim, bool reduce_all) { - Reduce(x, out, dims, keep_dim, reduce_all); -} - -void All(const FDTensor& x, FDTensor* out, const std::vector& dims, - bool keep_dim, bool reduce_all) { - BoolReduceKernel(x, out, dims, keep_dim, reduce_all); -} - -void Any(const FDTensor& x, FDTensor* out, const std::vector& dims, - bool keep_dim, bool reduce_all) { - BoolReduceKernel(x, out, dims, keep_dim, reduce_all); -} - -void Mean(const FDTensor& x, FDTensor* out, const std::vector& dims, - bool keep_dim, bool reduce_all) { - Reduce(x, out, dims, keep_dim, reduce_all); -} - -void Prod(const FDTensor& x, FDTensor* out, const std::vector& dims, - bool keep_dim, bool reduce_all) { - Reduce(x, out, dims, keep_dim, reduce_all); -} -#endif - -} // namespace fastdeploy \ No newline at end of file diff --git a/csrcs/fastdeploy/function/reduce.h b/csrcs/fastdeploy/function/reduce.h deleted file mode 100644 index af8810c6b..000000000 --- a/csrcs/fastdeploy/function/reduce.h +++ /dev/null @@ -1,100 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "fastdeploy/core/fd_tensor.h" - -namespace fastdeploy { - -#ifdef ENABLE_FDTENSOR_FUNC -/** Excute the maximum operation for input FDTensor along given dims. - @param x The input tensor. - @param out The output tensor which stores the result. - @param dims The vector of axis which will be reduced. - @param keep_dim Whether to keep the reduced dims, default false. - @param reduce_all Whether to reduce all dims, default false. -*/ -FASTDEPLOY_DECL void Max(const FDTensor& x, FDTensor* out, - const std::vector& dims, - bool keep_dim = false, bool reduce_all = false); - -/** Excute the minimum operation for input FDTensor along given dims. - @param x The input tensor. - @param out The output tensor which stores the result. - @param dims The vector of axis which will be reduced. - @param keep_dim Whether to keep the reduced dims, default false. - @param reduce_all Whether to reduce all dims, default false. -*/ -FASTDEPLOY_DECL void Min(const FDTensor& x, FDTensor* out, - const std::vector& dims, - bool keep_dim = false, bool reduce_all = false); - -/** Excute the sum operation for input FDTensor along given dims. - @param x The input tensor. - @param out The output tensor which stores the result. - @param dims The vector of axis which will be reduced. - @param keep_dim Whether to keep the reduced dims, default false. - @param reduce_all Whether to reduce all dims, default false. -*/ -FASTDEPLOY_DECL void Sum(const FDTensor& x, FDTensor* out, - const std::vector& dims, - bool keep_dim = false, bool reduce_all = false); - -/** Excute the all operation for input FDTensor along given dims. - @param x The input tensor. - @param out The output tensor which stores the result. - @param dims The vector of axis which will be reduced. - @param keep_dim Whether to keep the reduced dims, default false. - @param reduce_all Whether to reduce all dims, default false. -*/ -FASTDEPLOY_DECL void All(const FDTensor& x, FDTensor* out, - const std::vector& dims, - bool keep_dim = false, bool reduce_all = false); - -/** Excute the any operation for input FDTensor along given dims. - @param x The input tensor. - @param out The output tensor which stores the result. - @param dims The vector of axis which will be reduced. - @param keep_dim Whether to keep the reduced dims, default false. - @param reduce_all Whether to reduce all dims, default false. -*/ -FASTDEPLOY_DECL void Any(const FDTensor& x, FDTensor* out, - const std::vector& dims, - bool keep_dim = false, bool reduce_all = false); - -/** Excute the mean operation for input FDTensor along given dims. - @param x The input tensor. - @param out The output tensor which stores the result. - @param dims The vector of axis which will be reduced. - @param keep_dim Whether to keep the reduced dims, default false. - @param reduce_all Whether to reduce all dims, default false. -*/ -FASTDEPLOY_DECL void Mean(const FDTensor& x, FDTensor* out, - const std::vector& dims, - bool keep_dim = false, bool reduce_all = false); - -/** Excute the product operation for input FDTensor along given dims. - @param x The input tensor. - @param out The output tensor which stores the result. - @param dims The vector of axis which will be reduced. - @param keep_dim Whether to keep the reduced dims, default false. - @param reduce_all Whether to reduce all dims, default false. -*/ -FASTDEPLOY_DECL void Prod(const FDTensor& x, FDTensor* out, - const std::vector& dims, - bool keep_dim = false, bool reduce_all = false); - -#endif -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/function/reduce_functor.h b/csrcs/fastdeploy/function/reduce_functor.h deleted file mode 100644 index de0c45bb3..000000000 --- a/csrcs/fastdeploy/function/reduce_functor.h +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "fastdeploy/function/eigen.h" -namespace fastdeploy { - -//////// Max Functor /////// -struct MaxFunctor { - template - void operator()(const Eigen::DefaultDevice& dev, X* x, Y* y, const Dim& dim) { - y->device(dev) = x->maximum(dim); - } -}; - -//////// Min Functor /////// -struct MinFunctor { - template - void operator()(const Eigen::DefaultDevice& dev, X* x, Y* y, const Dim& dim) { - y->device(dev) = x->minimum(dim); - } -}; - -//////// Sum Functor /////// -struct SumFunctor { - template - void operator()(const Eigen::DefaultDevice& dev, X* x, Y* y, const Dim& dim) { - y->device(dev) = x->sum(dim); - } -}; - -//////// All Functor /////// -struct AllFunctor { - template - void operator()(const Eigen::DefaultDevice& dev, X* x, Y* y, const Dim& dim) { - y->device(dev) = x->all(dim); - } -}; - -//////// Any Functor /////// -struct AnyFunctor { - template - void operator()(const Eigen::DefaultDevice& dev, X* x, Y* y, const Dim& dim) { - y->device(dev) = x->any(dim); - } -}; - -//////// Mean Functor /////// -struct MeanFunctor { - template - void operator()(const Eigen::DefaultDevice& dev, X* x, Y* y, const Dim& dim) { - y->device(dev) = x->mean(dim); - } -}; - -//////// Prod Functor /////// -struct ProdFunctor { - template - void operator()(const Eigen::DefaultDevice& dev, X* x, Y* y, const Dim& dim) { - y->device(dev) = x->prod(dim); - } -}; - -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/pybind/fastdeploy_model.cc b/csrcs/fastdeploy/pybind/fastdeploy_model.cc deleted file mode 100644 index b59c0fd0f..000000000 --- a/csrcs/fastdeploy/pybind/fastdeploy_model.cc +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/pybind/main.h" -#include "fastdeploy/fastdeploy_model.h" - -namespace fastdeploy { - -void BindFDModel(pybind11::module& m) { - pybind11::class_(m, "FastDeployModel") - .def(pybind11::init<>(), "Default Constructor") - .def("model_name", &FastDeployModel::ModelName) - .def("num_inputs_of_runtime", &FastDeployModel::NumInputsOfRuntime) - .def("num_outputs_of_runtime", &FastDeployModel::NumOutputsOfRuntime) - .def("input_info_of_runtime", &FastDeployModel::InputInfoOfRuntime) - .def("output_info_of_runtime", &FastDeployModel::OutputInfoOfRuntime) - .def("initialized", &FastDeployModel::Initialized) - .def_readwrite("runtime_option", &FastDeployModel::runtime_option) - .def_readwrite("valid_cpu_backends", &FastDeployModel::valid_cpu_backends) - .def_readwrite("valid_gpu_backends", - &FastDeployModel::valid_gpu_backends); -} - -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/pybind/fastdeploy_runtime.cc b/csrcs/fastdeploy/pybind/fastdeploy_runtime.cc deleted file mode 100644 index 412b1ccef..000000000 --- a/csrcs/fastdeploy/pybind/fastdeploy_runtime.cc +++ /dev/null @@ -1,134 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/pybind/main.h" - -namespace fastdeploy { - -void BindRuntime(pybind11::module& m) { - pybind11::class_(m, "RuntimeOption") - .def(pybind11::init()) - .def("set_model_path", &RuntimeOption::SetModelPath) - .def("use_gpu", &RuntimeOption::UseGpu) - .def("use_cpu", &RuntimeOption::UseCpu) - .def("set_cpu_thread_num", &RuntimeOption::SetCpuThreadNum) - .def("use_paddle_backend", &RuntimeOption::UsePaddleBackend) - .def("use_ort_backend", &RuntimeOption::UseOrtBackend) - .def("use_trt_backend", &RuntimeOption::UseTrtBackend) - .def("enable_paddle_mkldnn", &RuntimeOption::EnablePaddleMKLDNN) - .def("disable_paddle_mkldnn", &RuntimeOption::DisablePaddleMKLDNN) - .def("set_paddle_mkldnn_cache_size", - &RuntimeOption::SetPaddleMKLDNNCacheSize) - .def("set_trt_input_shape", &RuntimeOption::SetTrtInputShape) - .def("enable_trt_fp16", &RuntimeOption::EnableTrtFP16) - .def("disable_trt_fp16", &RuntimeOption::DisableTrtFP16) - .def("set_trt_cache_file", &RuntimeOption::SetTrtCacheFile) - .def_readwrite("model_file", &RuntimeOption::model_file) - .def_readwrite("params_file", &RuntimeOption::params_file) - .def_readwrite("model_format", &RuntimeOption::model_format) - .def_readwrite("backend", &RuntimeOption::backend) - .def_readwrite("cpu_thread_num", &RuntimeOption::cpu_thread_num) - .def_readwrite("device_id", &RuntimeOption::device_id) - .def_readwrite("device", &RuntimeOption::device) - .def_readwrite("ort_graph_opt_level", &RuntimeOption::ort_graph_opt_level) - .def_readwrite("ort_inter_op_num_threads", - &RuntimeOption::ort_inter_op_num_threads) - .def_readwrite("ort_execution_mode", &RuntimeOption::ort_execution_mode) - .def_readwrite("trt_max_shape", &RuntimeOption::trt_max_shape) - .def_readwrite("trt_opt_shape", &RuntimeOption::trt_opt_shape) - .def_readwrite("trt_min_shape", &RuntimeOption::trt_min_shape) - .def_readwrite("trt_serialize_file", &RuntimeOption::trt_serialize_file) - .def_readwrite("trt_enable_fp16", &RuntimeOption::trt_enable_fp16) - .def_readwrite("trt_enable_int8", &RuntimeOption::trt_enable_int8) - .def_readwrite("trt_max_batch_size", &RuntimeOption::trt_max_batch_size) - .def_readwrite("trt_max_workspace_size", - &RuntimeOption::trt_max_workspace_size); - - pybind11::class_(m, "TensorInfo") - .def_readwrite("name", &TensorInfo::name) - .def_readwrite("shape", &TensorInfo::shape) - .def_readwrite("dtype", &TensorInfo::dtype); - - pybind11::class_(m, "Runtime") - .def(pybind11::init()) - .def("init", &Runtime::Init) - .def("infer", - [](Runtime& self, std::map& data) { - std::vector inputs(data.size()); - int index = 0; - for (auto iter = data.begin(); iter != data.end(); ++iter) { - inputs[index].dtype = - NumpyDataTypeToFDDataType(iter->second.dtype()); - inputs[index].shape.insert( - inputs[index].shape.begin(), iter->second.shape(), - iter->second.shape() + iter->second.ndim()); - // TODO(jiangjiajun) Maybe skip memory copy is a better choice - // use SetExternalData - inputs[index].data.resize(iter->second.nbytes()); - memcpy(inputs[index].data.data(), iter->second.mutable_data(), - iter->second.nbytes()); - inputs[index].name = iter->first; - index += 1; - } - - std::vector outputs(self.NumOutputs()); - self.Infer(inputs, &outputs); - - std::vector results; - results.reserve(outputs.size()); - for (size_t i = 0; i < outputs.size(); ++i) { - auto numpy_dtype = FDDataTypeToNumpyDataType(outputs[i].dtype); - results.emplace_back( - pybind11::array(numpy_dtype, outputs[i].shape)); - memcpy(results[i].mutable_data(), outputs[i].data.data(), - outputs[i].Numel() * FDDataTypeSize(outputs[i].dtype)); - } - return results; - }) - .def("num_inputs", &Runtime::NumInputs) - .def("num_outputs", &Runtime::NumOutputs) - .def("get_input_info", &Runtime::GetInputInfo) - .def("get_output_info", &Runtime::GetOutputInfo) - .def_readonly("option", &Runtime::option); - - pybind11::enum_(m, "Backend", pybind11::arithmetic(), - "Backend for inference.") - .value("UNKOWN", Backend::UNKNOWN) - .value("ORT", Backend::ORT) - .value("TRT", Backend::TRT) - .value("PDINFER", Backend::PDINFER); - pybind11::enum_(m, "Frontend", pybind11::arithmetic(), - "Frontend for inference.") - .value("PADDLE", Frontend::PADDLE) - .value("ONNX", Frontend::ONNX); - pybind11::enum_(m, "Device", pybind11::arithmetic(), - "Device for inference.") - .value("CPU", Device::CPU) - .value("GPU", Device::GPU); - - pybind11::enum_(m, "FDDataType", pybind11::arithmetic(), - "Data type of FastDeploy.") - .value("BOOL", FDDataType::BOOL) - .value("INT8", FDDataType::INT8) - .value("INT16", FDDataType::INT16) - .value("INT32", FDDataType::INT32) - .value("INT64", FDDataType::INT64) - .value("FP32", FDDataType::FP32) - .value("FP64", FDDataType::FP64) - .value("UINT8", FDDataType::UINT8); - - m.def("get_available_backends", []() { return GetAvailableBackends(); }); -} - -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/pybind/main.cc.in b/csrcs/fastdeploy/pybind/main.cc.in deleted file mode 100644 index 13e0a31c4..000000000 --- a/csrcs/fastdeploy/pybind/main.cc.in +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/pybind/main.h" - -namespace fastdeploy { - -void BindRuntime(pybind11::module&); -void BindFDModel(pybind11::module&); -void BindVision(pybind11::module&); - -pybind11::dtype FDDataTypeToNumpyDataType(const FDDataType& fd_dtype) { - pybind11::dtype dt; - if (fd_dtype == FDDataType::INT32) { - dt = pybind11::dtype::of(); - } else if (fd_dtype == FDDataType::INT64) { - dt = pybind11::dtype::of(); - } else if (fd_dtype == FDDataType::FP32) { - dt = pybind11::dtype::of(); - } else if (fd_dtype == FDDataType::FP64) { - dt = pybind11::dtype::of(); - } else if (fd_dtype == FDDataType::UINT8) { - dt = pybind11::dtype::of(); - } else { - FDASSERT(false, "The function doesn't support data type of " + - Str(fd_dtype) + "."); - } - return dt; -} - -FDDataType NumpyDataTypeToFDDataType(const pybind11::dtype& np_dtype) { - if (np_dtype.is(pybind11::dtype::of())) { - return FDDataType::INT32; - } else if (np_dtype.is(pybind11::dtype::of())) { - return FDDataType::INT64; - } else if (np_dtype.is(pybind11::dtype::of())) { - return FDDataType::FP32; - } else if (np_dtype.is(pybind11::dtype::of())) { - return FDDataType::FP64; - } else if (np_dtype.is(pybind11::dtype::of())) { - return FDDataType::UINT8; - } - FDASSERT(false, - "NumpyDataTypeToFDDataType() only support " - "int32/int64/float32/float64 now."); - return FDDataType::FP32; -} - -void PyArrayToTensor(pybind11::array& pyarray, FDTensor* tensor, - bool share_buffer) { - tensor->dtype = NumpyDataTypeToFDDataType(pyarray.dtype()); - tensor->shape.insert(tensor->shape.begin(), pyarray.shape(), - pyarray.shape() + pyarray.ndim()); - if (share_buffer) { - tensor->external_data_ptr = pyarray.mutable_data(); - } else { - tensor->data.resize(pyarray.nbytes()); - memcpy(tensor->data.data(), pyarray.mutable_data(), pyarray.nbytes()); - } -} - -pybind11::array TensorToPyArray(const FDTensor& tensor) { - auto numpy_dtype = FDDataTypeToNumpyDataType(tensor.dtype); - auto out = pybind11::array(numpy_dtype, tensor.shape); - memcpy(out.mutable_data(), tensor.Data(), tensor.Numel() * FDDataTypeSize(tensor.dtype)); - return out; -} - -#ifdef ENABLE_VISION -int NumpyDataTypeToOpenCvType(const pybind11::dtype& np_dtype) { - if (np_dtype.is(pybind11::dtype::of())) { - return CV_32S; - } else if (np_dtype.is(pybind11::dtype::of())) { - return CV_8U; - } else if (np_dtype.is(pybind11::dtype::of())) { - return CV_8U; - } else if (np_dtype.is(pybind11::dtype::of())) { - return CV_32F; - } else { - FDASSERT( - false, - "NumpyDataTypeToOpenCvType() only support int32/int8/uint8/float32 " - "now."); - } - return CV_8U; -} - -cv::Mat PyArrayToCvMat(pybind11::array& pyarray) { - auto cv_type = NumpyDataTypeToOpenCvType(pyarray.dtype()); - FDASSERT( - pyarray.ndim() == 3, - "Require rank of array to be 3 with HWC format while converting it to " - "cv::Mat."); - int channel = *(pyarray.shape() + 2); - int height = *(pyarray.shape()); - int width = *(pyarray.shape() + 1); - return cv::Mat(height, width, CV_MAKETYPE(cv_type, channel), - pyarray.mutable_data()); -} -#endif - -PYBIND11_MODULE(@PY_LIBRARY_NAME@, m) { - m.doc() = - "Make programer easier to deploy deeplearning model, save time to save " - "the world!"; - - BindRuntime(m); - BindFDModel(m); -#ifdef ENABLE_VISION - auto vision_module = - m.def_submodule("vision", "Vision module of FastDeploy."); - BindVision(vision_module); -#endif -} - -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/pybind/main.h b/csrcs/fastdeploy/pybind/main.h deleted file mode 100644 index 23f0eccc2..000000000 --- a/csrcs/fastdeploy/pybind/main.h +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include -#include - -#include "fastdeploy/fastdeploy_runtime.h" - -#ifdef ENABLE_VISION -#include "fastdeploy/vision.h" -#endif - -namespace fastdeploy { - -void BindBackend(pybind11::module&); -void BindVision(pybind11::module&); - -pybind11::dtype FDDataTypeToNumpyDataType(const FDDataType& fd_dtype); - -FDDataType NumpyDataTypeToFDDataType(const pybind11::dtype& np_dtype); - -void PyArrayToTensor(pybind11::array& pyarray, FDTensor* tensor, - bool share_buffer = false); -pybind11::array TensorToPyArray(const FDTensor& tensor); - -#ifdef ENABLE_VISION -cv::Mat PyArrayToCvMat(pybind11::array& pyarray); -#endif - -template -FDDataType CTypeToFDDataType() { - if (std::is_same::value) { - return FDDataType::INT32; - } else if (std::is_same::value) { - return FDDataType::INT64; - } else if (std::is_same::value) { - return FDDataType::FP32; - } else if (std::is_same::value) { - return FDDataType::FP64; - } - FDASSERT(false, - "CTypeToFDDataType only support int32/int64/float32/float64 now."); - return FDDataType::FP32; -} - -template -std::vector PyBackendInfer( - T& self, const std::vector& names, - std::vector& data) { - std::vector inputs(data.size()); - for (size_t i = 0; i < data.size(); ++i) { - // TODO(jiangjiajun) here is considered to use user memory directly - inputs[i].dtype = NumpyDataTypeToFDDataType(data[i].dtype()); - inputs[i].shape.insert(inputs[i].shape.begin(), data[i].shape(), - data[i].shape() + data[i].ndim()); - inputs[i].data.resize(data[i].nbytes()); - memcpy(inputs[i].data.data(), data[i].mutable_data(), data[i].nbytes()); - inputs[i].name = names[i]; - } - - std::vector outputs(self.NumOutputs()); - self.Infer(inputs, &outputs); - - std::vector results; - results.reserve(outputs.size()); - for (size_t i = 0; i < outputs.size(); ++i) { - auto numpy_dtype = FDDataTypeToNumpyDataType(outputs[i].dtype); - results.emplace_back(pybind11::array(numpy_dtype, outputs[i].shape)); - memcpy(results[i].mutable_data(), outputs[i].data.data(), - outputs[i].Numel() * FDDataTypeSize(outputs[i].dtype)); - } - return results; -} - -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/text.h b/csrcs/fastdeploy/text.h deleted file mode 100644 index 184f0f4f9..000000000 --- a/csrcs/fastdeploy/text.h +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#pragma once - -#include "fastdeploy/core/config.h" -#ifdef ENABLE_TEXT -#include "fastdeploy/text/text_model.h" -#endif diff --git a/csrcs/fastdeploy/text/common/option.h b/csrcs/fastdeploy/text/common/option.h deleted file mode 100644 index a795fd066..000000000 --- a/csrcs/fastdeploy/text/common/option.h +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include "fastdeploy/utils/utils.h" - -namespace fastdeploy { -namespace text { - -struct FASTDEPLOY_DECL TextPreprocessOption {}; -struct FASTDEPLOY_DECL TextPostprocessOption {}; -struct FASTDEPLOY_DECL PredictionOption {}; - -} // namespace text -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/text/common/result.cc b/csrcs/fastdeploy/text/common/result.cc deleted file mode 100644 index cb7efbb73..000000000 --- a/csrcs/fastdeploy/text/common/result.cc +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#include "fastdeploy/text/common/result.h" - -namespace fastdeploy { -namespace text {} // namespace text -} // namespace fastdeploy \ No newline at end of file diff --git a/csrcs/fastdeploy/text/common/result.h b/csrcs/fastdeploy/text/common/result.h deleted file mode 100644 index 4a6f716a3..000000000 --- a/csrcs/fastdeploy/text/common/result.h +++ /dev/null @@ -1,23 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#pragma once -#include "fastdeploy/utils/utils.h" - -namespace fastdeploy { -namespace text { - -struct FASTDEPLOY_DECL Result {}; - -} // namespace text -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/text/postprocessor/postprocessor.cc b/csrcs/fastdeploy/text/postprocessor/postprocessor.cc deleted file mode 100644 index e8f717743..000000000 --- a/csrcs/fastdeploy/text/postprocessor/postprocessor.cc +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/text/postprocessor/postprocessor.h" - -namespace fastdeploy { -namespace text { - -bool Postprocessor::Decode(const std::vector& model_result, - Result* decoded_result) const { - return true; -} - -bool Postprocessor::DecodeBatch(const std::vector& model_result, - Result* decoded_result) const { - return true; -} - -} // namespace text -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/text/postprocessor/postprocessor.h b/csrcs/fastdeploy/text/postprocessor/postprocessor.h deleted file mode 100644 index 76f6a7090..000000000 --- a/csrcs/fastdeploy/text/postprocessor/postprocessor.h +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include "fastdeploy/core/fd_tensor.h" -#include "fastdeploy/text/common/result.h" -#include "fastdeploy/utils/utils.h" - -namespace fastdeploy { -namespace text { - -class Postprocessor { - public: - virtual bool Decode(const std::vector& model_result, - Result* decoded_result) const; - virtual bool DecodeBatch(const std::vector& model_result, - Result* decoded_result) const; -}; - -} // namespace text -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/text/preprocessor/preprocessor.cc b/csrcs/fastdeploy/text/preprocessor/preprocessor.cc deleted file mode 100644 index 2e2715f61..000000000 --- a/csrcs/fastdeploy/text/preprocessor/preprocessor.cc +++ /dev/null @@ -1,32 +0,0 @@ - -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/text/preprocessor/preprocessor.h" - -namespace fastdeploy { -namespace text { - -bool Preprocessor::Encode(const std::string& raw_text, - std::vector* encoded_tensor) const { - return true; -} - -bool Preprocessor::EncodeBatch(const std::vector& raw_texts, - std::vector* encoded_tensor) const { - return true; -} - -} // namespace text -} // namespace fastdeploy \ No newline at end of file diff --git a/csrcs/fastdeploy/text/preprocessor/preprocessor.h b/csrcs/fastdeploy/text/preprocessor/preprocessor.h deleted file mode 100644 index 799967093..000000000 --- a/csrcs/fastdeploy/text/preprocessor/preprocessor.h +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include "fastdeploy/core/fd_tensor.h" -#include "fastdeploy/utils/utils.h" - -namespace fastdeploy { -namespace text { - -class Preprocessor { - public: - virtual bool Encode(const std::string& raw_text, - std::vector* encoded_tensor) const; - virtual bool EncodeBatch(const std::vector& raw_texts, - std::vector* encoded_tensor) const; -}; - -} // namespace text -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/text/text_model.cc b/csrcs/fastdeploy/text/text_model.cc deleted file mode 100644 index d5a40c0e5..000000000 --- a/csrcs/fastdeploy/text/text_model.cc +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/text/text_model.h" -#include "fastdeploy/text/common/option.h" -#include "fastdeploy/text/common/result.h" -#include "fastdeploy/text/postprocessor/postprocessor.h" -#include "fastdeploy/text/preprocessor/preprocessor.h" - -namespace fastdeploy { -namespace text { - -bool TextModel::Predict(const std::string& raw_text, Result* result, - const PredictionOption& option) { - // Preprocess - std::vector input_tensor; - std::vector output_tensor; - if (!preprocessor_->Encode(raw_text, &input_tensor)) { - FDERROR << "Failed to preprocess input data while using model:" - << ModelName() << "." << std::endl; - return false; - } - - // Inference Runtime - if (!Infer(input_tensor, &output_tensor)) { - FDERROR << "Failed to inference while using model:" << ModelName() << "." - << std::endl; - return false; - } - - // Postprocess - if (postprocessor_->Decode(output_tensor, result)) { - FDERROR << "Failed to postprocess while using model:" << ModelName() << "." - << std::endl; - return false; - } - return true; -} - -bool TextModel::PredictBatch(const std::vector& raw_text_array, - Result* results, const PredictionOption& option) { - // Preprocess - std::vector input_tensor; - std::vector output_tensor; - if (!preprocessor_->EncodeBatch(raw_text_array, &input_tensor)) { - FDERROR << "Failed to preprocess input data while using model:" - << ModelName() << "." << std::endl; - return false; - } - - // Inference Runtime - if (!Infer(input_tensor, &output_tensor)) { - FDERROR << "Failed to inference while using model:" << ModelName() << "." - << std::endl; - return false; - } - - // Postprocess - if (postprocessor_->DecodeBatch(output_tensor, results)) { - FDERROR << "Failed to postprocess while using model:" << ModelName() << "." - << std::endl; - return false; - } - return true; -} - -} // namespace text -} // namespace fastdeploy \ No newline at end of file diff --git a/csrcs/fastdeploy/text/text_model.h b/csrcs/fastdeploy/text/text_model.h deleted file mode 100644 index b7fbd5929..000000000 --- a/csrcs/fastdeploy/text/text_model.h +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include - -#include "fastdeploy/fastdeploy_model.h" -#include "fastdeploy/utils/unique_ptr.h" - -namespace fastdeploy { -namespace text { - -class Preprocessor; -class Postprocessor; -class Result; -class PredictionOption; - -class FASTDEPLOY_DECL TextModel : public FastDeployModel { - public: - virtual std::string ModelName() const { return "TextModel"; } - virtual bool Predict(const std::string& raw_text, Result* result, - const PredictionOption& option); - virtual bool PredictBatch(const std::vector& raw_text_array, - Result* result, const PredictionOption& option); - template - void SetPreprocessor(Args&&... args) { - preprocessor_ = utils::make_unique(std::forward(args)...); - } - template - void SetPostprocessor(Args&&... args) { - postprocessor_ = utils::make_unique(std::forward(args)...); - } - - private: - std::unique_ptr preprocessor_; - std::unique_ptr postprocessor_; -}; - -} // namespace text -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/text/text_pybind.cc b/csrcs/fastdeploy/text/text_pybind.cc deleted file mode 100644 index 564892f16..000000000 --- a/csrcs/fastdeploy/text/text_pybind.cc +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. \ No newline at end of file diff --git a/csrcs/fastdeploy/utils/perf.h b/csrcs/fastdeploy/utils/perf.h deleted file mode 100644 index 9f451c3a9..000000000 --- a/csrcs/fastdeploy/utils/perf.h +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "fastdeploy/utils/utils.h" -#include // NOLINT - -namespace fastdeploy { - -class FASTDEPLOY_DECL TimeCounter { - public: - void Start() { begin_ = std::chrono::system_clock::now(); } - - void End() { end_ = std::chrono::system_clock::now(); } - - double Duration() { - auto duration = - std::chrono::duration_cast(end_ - begin_); - return static_cast(duration.count()) * - std::chrono::microseconds::period::num / - std::chrono::microseconds::period::den; - } - - void PrintInfo(const std::string& prefix = "TimeCounter: ", - bool print_out = true) { - if (!print_out) { - return; - } - FDLogger() << prefix << " duration = " << Duration() << "s." << std::endl; - } - - private: - std::chrono::time_point begin_; - std::chrono::time_point end_; -}; - -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/utils/unique_ptr.h b/csrcs/fastdeploy/utils/unique_ptr.h deleted file mode 100644 index 2f24ef70c..000000000 --- a/csrcs/fastdeploy/utils/unique_ptr.h +++ /dev/null @@ -1,58 +0,0 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include - -namespace fastdeploy { -namespace utils { -// Trait to select overloads and return types for MakeUnique. -template -struct MakeUniqueResult { - using scalar = std::unique_ptr; -}; -template -struct MakeUniqueResult { - using array = std::unique_ptr; -}; -template -struct MakeUniqueResult { - using invalid = void; -}; - -// MakeUnique(...) is an early implementation of C++14 std::make_unique. -// It is designed to be 100% compatible with std::make_unique so that the -// eventual switchover will be a simple renaming operation. -template -typename MakeUniqueResult::scalar make_unique(Args &&... args) { // NOLINT - return std::unique_ptr( - new T(std::forward(args)...)); // NOLINT(build/c++11) -} - -// Overload for array of unknown bound. -// The allocation of arrays needs to use the array form of new, -// and cannot take element constructor arguments. -template -typename MakeUniqueResult::array make_unique(size_t n) { - return std::unique_ptr(new typename std::remove_extent::type[n]()); -} - -// Reject arrays of known bound. -template -typename MakeUniqueResult::invalid make_unique(Args &&... /* args */) = - delete; // NOLINT - -} // namespace utils -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/utils/utils.cc b/csrcs/fastdeploy/utils/utils.cc deleted file mode 100644 index 3899bcf5e..000000000 --- a/csrcs/fastdeploy/utils/utils.cc +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/utils/utils.h" - -namespace fastdeploy { - -FDLogger::FDLogger(bool verbose, const std::string& prefix) { - verbose_ = verbose; - line_ = ""; - prefix_ = prefix; -} - -FDLogger& FDLogger::operator<<(std::ostream& (*os)(std::ostream&)) { - if (!verbose_) { - return *this; - } - std::cout << prefix_ << " " << line_ << std::endl; - line_ = ""; - return *this; -} - -bool ReadBinaryFromFile(const std::string& file, std::string* contents) { - std::ifstream fin(file, std::ios::in | std::ios::binary); - if (!fin.is_open()) { - FDERROR << "Failed to open file: " << file << " to read." << std::endl; - return false; - } - fin.seekg(0, std::ios::end); - contents->clear(); - contents->resize(fin.tellg()); - fin.seekg(0, std::ios::beg); - fin.read(&(contents->at(0)), contents->size()); - fin.close(); - return true; -} - -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/utils/utils.h b/csrcs/fastdeploy/utils/utils.h deleted file mode 100644 index 3e309a12a..000000000 --- a/csrcs/fastdeploy/utils/utils.h +++ /dev/null @@ -1,150 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include - -#include -#include -#include -#include - -#if defined(_WIN32) -#ifdef FASTDEPLOY_LIB -#define FASTDEPLOY_DECL __declspec(dllexport) -#else -#define FASTDEPLOY_DECL __declspec(dllimport) -#endif // FASTDEPLOY_LIB -#else -#define FASTDEPLOY_DECL __attribute__((visibility("default"))) -#endif // _WIN32 - -namespace fastdeploy { - -class FASTDEPLOY_DECL FDLogger { - public: - FDLogger() { - line_ = ""; - prefix_ = "[FastDeploy]"; - verbose_ = true; - } - explicit FDLogger(bool verbose, const std::string& prefix = "[FastDeploy]"); - - template - FDLogger& operator<<(const T& val) { - if (!verbose_) { - return *this; - } - std::stringstream ss; - ss << val; - line_ += ss.str(); - return *this; - } - FDLogger& operator<<(std::ostream& (*os)(std::ostream&)); - ~FDLogger() { - if (!verbose_ && line_ != "") { - std::cout << line_ << std::endl; - } - } - - private: - std::string line_; - std::string prefix_; - bool verbose_ = true; -}; - -FASTDEPLOY_DECL bool ReadBinaryFromFile(const std::string& file, - std::string* contents); - -#ifndef __REL_FILE__ -#define __REL_FILE__ __FILE__ -#endif - -#define FDERROR \ - FDLogger(true, "[ERROR]") << __REL_FILE__ << "(" << __LINE__ \ - << ")::" << __FUNCTION__ << "\t" - -#define FDWARNING \ - FDLogger(true, "[WARNING]") << __REL_FILE__ << "(" << __LINE__ \ - << ")::" << __FUNCTION__ << "\t" - -#define FDINFO \ - FDLogger(true, "[INFO]") << __REL_FILE__ << "(" << __LINE__ \ - << ")::" << __FUNCTION__ << "\t" - -#define FDASSERT(condition, message) \ - if (!(condition)) { \ - FDERROR << message << std::endl; \ - std::abort(); \ - } - -///////// Basic Marco /////////// - -#define FD_PRIVATE_CASE_TYPE_USING_HINT(NAME, enum_type, type, HINT, ...) \ - case enum_type: { \ - using HINT = type; \ - __VA_ARGS__(); \ - break; \ - } - -#define FD_PRIVATE_CASE_TYPE(NAME, enum_type, type, ...) \ - FD_PRIVATE_CASE_TYPE_USING_HINT(NAME, enum_type, type, data_t, __VA_ARGS__) - -#define FD_VISIT_ALL_TYPES(TYPE, NAME, ...) \ - [&] { \ - const auto& __dtype__ = TYPE; \ - switch (__dtype__) { \ - FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::BOOL, bool, \ - __VA_ARGS__) \ - FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT32, int32_t, \ - __VA_ARGS__) \ - FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT64, int64_t, \ - __VA_ARGS__) \ - FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP32, float, \ - __VA_ARGS__) \ - FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP64, double, \ - __VA_ARGS__) \ - default: \ - FDASSERT(false, "Invalid enum data type.") \ - } \ - }() - -#define FD_VISIT_FLOAT_TYPES(TYPE, NAME, ...) \ - [&] { \ - const auto& __dtype__ = TYPE; \ - switch (__dtype__) { \ - FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP32, float, \ - __VA_ARGS__) \ - FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::FP64, double, \ - __VA_ARGS__) \ - default: \ - FDASSERT(false, "Invalid enum data type.") \ - } \ - }() - -#define FD_VISIT_INT_TYPES(TYPE, NAME, ...) \ - [&] { \ - const auto& __dtype__ = TYPE; \ - switch (__dtype__) { \ - FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT32, int32_t, \ - __VA_ARGS__) \ - FD_PRIVATE_CASE_TYPE(NAME, ::fastdeploy::FDDataType::INT64, int64_t, \ - __VA_ARGS__) \ - default: \ - FDASSERT(false, "Invalid enum data type.") \ - } \ - }() - -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision.h b/csrcs/fastdeploy/vision.h deleted file mode 100644 index 21371b5a1..000000000 --- a/csrcs/fastdeploy/vision.h +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#pragma once - -#include "fastdeploy/core/config.h" -#ifdef ENABLE_VISION -#include "fastdeploy/vision/detection/contrib/nanodet_plus.h" -#include "fastdeploy/vision/detection/contrib/scaledyolov4.h" -#include "fastdeploy/vision/detection/contrib/yolor.h" -#include "fastdeploy/vision/detection/contrib/yolov5.h" -#include "fastdeploy/vision/detection/contrib/yolov5lite.h" -#include "fastdeploy/vision/detection/contrib/yolov6.h" -#include "fastdeploy/vision/detection/contrib/yolov7.h" -#include "fastdeploy/vision/detection/contrib/yolox.h" -#include "fastdeploy/vision/facedet/contrib/retinaface.h" -#include "fastdeploy/vision/facedet/contrib/scrfd.h" -#include "fastdeploy/vision/facedet/contrib/ultraface.h" -#include "fastdeploy/vision/facedet/contrib/yolov5face.h" -#include "fastdeploy/vision/faceid/contrib/arcface.h" -#include "fastdeploy/vision/faceid/contrib/cosface.h" -#include "fastdeploy/vision/faceid/contrib/insightface_rec.h" -#include "fastdeploy/vision/faceid/contrib/partial_fc.h" -#include "fastdeploy/vision/faceid/contrib/vpl.h" -#include "fastdeploy/vision/matting/contrib/modnet.h" -#include "fastdeploy/vision/ppcls/model.h" -#include "fastdeploy/vision/detection/ppdet/model.h" -#include "fastdeploy/vision/ppseg/model.h" -#endif - -#include "fastdeploy/vision/visualize/visualize.h" diff --git a/csrcs/fastdeploy/vision/AddModel.md b/csrcs/fastdeploy/vision/AddModel.md deleted file mode 100644 index 30080bd5e..000000000 --- a/csrcs/fastdeploy/vision/AddModel.md +++ /dev/null @@ -1,3 +0,0 @@ -# 如何添加一个模型 - -本文档以[yolov5](https://github.com/ultralytics/yolov5)为例,说明如何添加新的模型支持。 diff --git a/csrcs/fastdeploy/vision/common/processors/base.cc b/csrcs/fastdeploy/vision/common/processors/base.cc deleted file mode 100644 index d770522d8..000000000 --- a/csrcs/fastdeploy/vision/common/processors/base.cc +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/common/processors/base.h" -#include "fastdeploy/utils/utils.h" - -namespace fastdeploy { -namespace vision { - -ProcLib Processor::default_lib = ProcLib::DEFAULT; - -bool Processor::CpuRun(Mat* mat) { - FDERROR << "Unimplemented CpuRun." << std::endl; - return false; -} - -#ifdef ENABLE_OPENCV_CUDA -bool Processor::GpuRun(Mat* mat) { - FDERROR << "Unimplemented GpuRun." << std::endl; - return false; -} -#endif - -bool Processor::operator()(Mat* mat, ProcLib lib) { - // if default_lib is set - // then use default_lib - ProcLib target = lib; - if (default_lib != ProcLib::DEFAULT) { - target = default_lib; - } - - if (target == ProcLib::OPENCV_CUDA) { -#ifdef ENABLE_OPENCV_CUDA - bool ret = GpuRun(mat); - mat->device = Device::GPU; - return ret; -#else - FDERROR - << "OpenCV is not compiled with CUDA, cannot process image with CUDA." - << std::endl; - return false; -#endif - } - bool ret = CpuRun(mat); - mat->device = Device::CPU; - return ret; -} - -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/common/processors/base.h b/csrcs/fastdeploy/vision/common/processors/base.h deleted file mode 100644 index d4138864a..000000000 --- a/csrcs/fastdeploy/vision/common/processors/base.h +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "fastdeploy/utils/utils.h" -#include "fastdeploy/vision/common/processors/mat.h" -#include "opencv2/highgui/highgui.hpp" -#include "opencv2/imgproc/imgproc.hpp" - -namespace fastdeploy { -namespace vision { - -enum ProcLib { DEFAULT, OPENCV_CPU, OPENCV_CUDA }; - -class Processor { - public: - // default_lib has the highest priority - // all the function in `processor` will force to use - // default_lib if this flag is set. - // DEFAULT means this flag is not set - static ProcLib default_lib; - - // virtual bool ShapeInfer(const std::vector& in_shape, - // std::vector* out_shape) = 0; - virtual std::string Name() = 0; - virtual bool CpuRun(Mat* mat); -#ifdef ENABLE_OPENCV_CUDA - virtual bool GpuRun(Mat* mat); -#endif - - virtual bool operator()(Mat* mat, - ProcLib lib = ProcLib::OPENCV_CPU); -}; - -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/common/processors/cast.cc b/csrcs/fastdeploy/vision/common/processors/cast.cc deleted file mode 100644 index b9a757f14..000000000 --- a/csrcs/fastdeploy/vision/common/processors/cast.cc +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/common/processors/cast.h" - -namespace fastdeploy { -namespace vision { - -bool Cast::CpuRun(Mat* mat) { - cv::Mat* im = mat->GetCpuMat(); - int c = im->channels(); - if (dtype_ == "float") { - if (im->type() != CV_32FC(c)) { - im->convertTo(*im, CV_32FC(c)); - } - } else if (dtype_ == "double") { - if (im->type() != CV_64FC(c)) { - im->convertTo(*im, CV_64FC(c)); - } - } else { - FDWARNING << "Cast not support for " << dtype_ - << " now! will skip this operation." << std::endl; - } - return true; -} - -#ifdef ENABLE_OPENCV_CUDA -bool Cast::GpuRun(Mat* mat) { - cv::cuda::GpuMat* im = mat->GetGpuMat(); - int c = im->channels(); - if (dtype_ == "float") { - if (im->type() != CV_32FC(c)) { - im->convertTo(*im, CV_32FC(c)); - } - } else if (dtype_ == "double") { - if (im->type() != CV_64FC(c)) { - im->convertTo(*im, CV_64FC(c)); - } - } else { - FDWARNING << "Cast not support for " << dtype_ - << " now! will skip this operation." << std::endl; - } - return true; -} -#endif - -bool Cast::Run(Mat* mat, const std::string& dtype, ProcLib lib) { - auto c = Cast(dtype); - return c(mat, lib); -} - -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/common/processors/cast.h b/csrcs/fastdeploy/vision/common/processors/cast.h deleted file mode 100644 index 1111f08a6..000000000 --- a/csrcs/fastdeploy/vision/common/processors/cast.h +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "fastdeploy/vision/common/processors/base.h" - -namespace fastdeploy { -namespace vision { - -class Cast : public Processor { - public: - explicit Cast(const std::string& dtype = "float") : dtype_(dtype) {} - bool CpuRun(Mat* mat); -#ifdef ENABLE_OPENCV_CUDA - bool GpuRun(Mat* mat); -#endif - std::string Name() { return "Cast"; } - static bool Run(Mat* mat, const std::string& dtype, - ProcLib lib = ProcLib::OPENCV_CPU); - - private: - std::string dtype_; -}; -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/common/processors/center_crop.cc b/csrcs/fastdeploy/vision/common/processors/center_crop.cc deleted file mode 100644 index 27b86ca2d..000000000 --- a/csrcs/fastdeploy/vision/common/processors/center_crop.cc +++ /dev/null @@ -1,63 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/common/processors/center_crop.h" - -namespace fastdeploy { -namespace vision { - -bool CenterCrop::CpuRun(Mat* mat) { - cv::Mat* im = mat->GetCpuMat(); - int height = static_cast(im->rows); - int width = static_cast(im->cols); - if (height < height_ || width < width_) { - FDERROR << "[CenterCrop] Image size less than crop size" << std::endl; - return false; - } - int offset_x = static_cast((width - width_) / 2); - int offset_y = static_cast((height - height_) / 2); - cv::Rect crop_roi(offset_x, offset_y, width_, height_); - *im = (*im)(crop_roi); - mat->SetWidth(width_); - mat->SetHeight(height_); - return true; -} - -#ifdef ENABLE_OPENCV_CUDA -bool CenterCrop::GpuRun(Mat* mat) { - cv::cuda::GpuMat* im = mat->GetGpuMat(); - int height = static_cast(im->rows); - int width = static_cast(im->cols); - if (height < height_ || width < width_) { - FDERROR << "[CenterCrop] Image size less than crop size" << std::endl; - return false; - } - int offset_x = static_cast((width - width_) / 2); - int offset_y = static_cast((height - height_) / 2); - cv::Rect crop_roi(offset_x, offset_y, width_, height_); - *im = (*im)(crop_roi); - mat->SetWidth(width_); - mat->SetHeight(height_); - return true; -} -#endif - -bool CenterCrop::Run(Mat* mat, const int& width, const int& height, - ProcLib lib) { - auto c = CenterCrop(width, height); - return c(mat, lib); -} - -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/common/processors/center_crop.h b/csrcs/fastdeploy/vision/common/processors/center_crop.h deleted file mode 100644 index 86ad0e20d..000000000 --- a/csrcs/fastdeploy/vision/common/processors/center_crop.h +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "fastdeploy/vision/common/processors/base.h" - -namespace fastdeploy { -namespace vision { - -class CenterCrop : public Processor { - public: - CenterCrop(int width, int height) : height_(height), width_(width) {} - bool CpuRun(Mat* mat); -#ifdef ENABLE_OPENCV_CUDA - bool GpuRun(Mat* mat); -#endif - std::string Name() { return "CenterCrop"; } - - static bool Run(Mat* mat, const int& width, const int& height, - ProcLib lib = ProcLib::OPENCV_CPU); - - private: - int height_; - int width_; -}; - -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/common/processors/color_space_convert.cc b/csrcs/fastdeploy/vision/common/processors/color_space_convert.cc deleted file mode 100644 index bcdaf365a..000000000 --- a/csrcs/fastdeploy/vision/common/processors/color_space_convert.cc +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/common/processors/color_space_convert.h" - -namespace fastdeploy { -namespace vision { -bool BGR2RGB::CpuRun(Mat* mat) { - cv::Mat* im = mat->GetCpuMat(); - cv::cvtColor(*im, *im, cv::COLOR_BGR2RGB); - return true; -} - -#ifdef ENABLE_OPENCV_CUDA -bool BGR2RGB::GpuRun(Mat* mat) { - cv::cuda::GpuMat* im = mat->GetGpuMat(); - cv::cuda::cvtColor(*im, *im, cv::COLOR_BGR2RGB); - return true; -} -#endif - -bool RGB2BGR::CpuRun(Mat* mat) { - cv::Mat* im = mat->GetCpuMat(); - cv::cvtColor(*im, *im, cv::COLOR_RGB2BGR); - return true; -} - -#ifdef ENABLE_OPENCV_CUDA -bool RGB2BGR::GpuRun(Mat* mat) { - cv::cuda::GpuMat* im = mat->GetGpuMat(); - cv::cuda::cvtColor(*im, *im, cv::COLOR_RGB2BGR); - return true; -} -#endif - -bool BGR2RGB::Run(Mat* mat, ProcLib lib) { - auto b = BGR2RGB(); - return b(mat, lib); -} - -bool RGB2BGR::Run(Mat* mat, ProcLib lib) { - auto r = RGB2BGR(); - return r(mat, lib); -} - -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/common/processors/color_space_convert.h b/csrcs/fastdeploy/vision/common/processors/color_space_convert.h deleted file mode 100644 index 472bcf16d..000000000 --- a/csrcs/fastdeploy/vision/common/processors/color_space_convert.h +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "fastdeploy/vision/common/processors/base.h" - -namespace fastdeploy { -namespace vision { - -class BGR2RGB : public Processor { - public: - bool CpuRun(Mat* mat); -#ifdef ENABLE_OPENCV_CUDA - bool GpuRun(Mat* mat); -#endif - virtual std::string Name() { return "BGR2RGB"; } - - static bool Run(Mat* mat, ProcLib lib = ProcLib::OPENCV_CPU); -}; - -class RGB2BGR : public Processor { - public: - bool CpuRun(Mat* mat); -#ifdef ENABLE_OPENCV_CUDA - bool GpuRun(Mat* mat); -#endif - std::string Name() { return "RGB2BGR"; } - - static bool Run(Mat* mat, ProcLib lib = ProcLib::OPENCV_CPU); -}; -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/common/processors/convert.cc b/csrcs/fastdeploy/vision/common/processors/convert.cc deleted file mode 100644 index a7ca6de07..000000000 --- a/csrcs/fastdeploy/vision/common/processors/convert.cc +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/common/processors/convert.h" - -namespace fastdeploy { - -namespace vision { - -Convert::Convert(const std::vector& alpha, - const std::vector& beta) { - FDASSERT(alpha.size() == beta.size(), - "Convert: requires the size of alpha equal to the size of beta."); - FDASSERT(alpha.size() != 0, - "Convert: requires the size of alpha and beta > 0."); - alpha_.assign(alpha.begin(), alpha.end()); - beta_.assign(beta.begin(), beta.end()); -} - -bool Convert::CpuRun(Mat* mat) { - cv::Mat* im = mat->GetCpuMat(); - std::vector split_im; - cv::split(*im, split_im); - for (int c = 0; c < im->channels(); c++) { - split_im[c].convertTo(split_im[c], CV_32FC1, alpha_[c], beta_[c]); - } - cv::merge(split_im, *im); - return true; -} - -#ifdef ENABLE_OPENCV_CUDA -bool Convert::GpuRun(Mat* mat) { - cv::cuda::GpuMat* im = mat->GetGpuMat(); - std::vector split_im; - cv::cuda::split(*im, split_im); - for (int c = 0; c < im->channels(); c++) { - split_im[c].convertTo(split_im[c], CV_32FC1, alpha_[c], beta_[c]); - } - cv::cuda::merge(split_im, *im); - return true; -} -#endif - -bool Convert::Run(Mat* mat, const std::vector& alpha, - const std::vector& beta, ProcLib lib) { - auto c = Convert(alpha, beta); - return c(mat, lib); -} - -} // namespace vision -} // namespace fastdeploy \ No newline at end of file diff --git a/csrcs/fastdeploy/vision/common/processors/convert.h b/csrcs/fastdeploy/vision/common/processors/convert.h deleted file mode 100644 index 5d5a5276f..000000000 --- a/csrcs/fastdeploy/vision/common/processors/convert.h +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "fastdeploy/vision/common/processors/base.h" - -namespace fastdeploy { -namespace vision { -class Convert : public Processor { - public: - Convert(const std::vector& alpha, const std::vector& beta); - - bool CpuRun(Mat* mat); -#ifdef ENABLE_OPENCV_CUDA - bool GpuRun(Mat* mat); -#endif - std::string Name() { return "Convert"; } - - // Compute `result = mat * alpha + beta` directly by channel. - // The default behavior is the same as OpenCV's convertTo method. - static bool Run(Mat* mat, const std::vector& alpha, - const std::vector& beta, - ProcLib lib = ProcLib::OPENCV_CPU); - - private: - std::vector alpha_; - std::vector beta_; -}; -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/common/processors/hwc2chw.cc b/csrcs/fastdeploy/vision/common/processors/hwc2chw.cc deleted file mode 100644 index 5bea87e18..000000000 --- a/csrcs/fastdeploy/vision/common/processors/hwc2chw.cc +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/common/processors/hwc2chw.h" - -namespace fastdeploy { -namespace vision { -bool HWC2CHW::CpuRun(Mat* mat) { - if (mat->layout != Layout::HWC) { - FDERROR << "HWC2CHW: The input data is not Layout::HWC format!" - << std::endl; - return false; - } - cv::Mat* im = mat->GetCpuMat(); - cv::Mat im_clone = im->clone(); - int rh = im->rows; - int rw = im->cols; - int rc = im->channels(); - - // float* data = reinterpret_cast(im->data); - for (int i = 0; i < rc; ++i) { - // cv::extractChannel(im_clone, cv::Mat(rh, rw, im->type() % 8, data + i - // * rh * rw), - // i); - cv::extractChannel( - im_clone, - cv::Mat(rh, rw, im->type() % 8, - im->ptr() + i * rh * rw * FDDataTypeSize(mat->Type())), - i); - } - mat->layout = Layout::CHW; - return true; -} - -#ifdef ENABLE_OPENCV_CUDA -bool HWC2CHW::GpuRun(Mat* mat) { - if (mat->layout != Layout::HWC) { - FDERROR << "HWC2CHW: The input data is not Layout::HWC format!" - << std::endl; - return false; - } - cv::cuda::GpuMat* im = mat->GetGpuMat(); - cv::cuda::GpuMat im_clone = im->clone(); - int rh = im->rows; - int rw = im->cols; - int rc = im->channels(); - int num_pixels = rh * rw; - std::vector channels{ - cv::cuda::GpuMat(rh, rw, im->type() % 8, &(im->ptr()[0])), - cv::cuda::GpuMat(rh, rw, im->type() % 8, &(im->ptr()[num_pixels])), - cv::cuda::GpuMat(rh, rw, im->type() % 8, &(im->ptr()[num_pixels * 2]))}; - cv::cuda::split(im_clone, channels); - mat->layout = Layout::CHW; - return true; -} -#endif - -bool HWC2CHW::Run(Mat* mat, ProcLib lib) { - auto h = HWC2CHW(); - return h(mat, lib); -} - -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/common/processors/hwc2chw.h b/csrcs/fastdeploy/vision/common/processors/hwc2chw.h deleted file mode 100644 index 56fa3ede8..000000000 --- a/csrcs/fastdeploy/vision/common/processors/hwc2chw.h +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "fastdeploy/vision/common/processors/base.h" - -namespace fastdeploy { -namespace vision { - -class HWC2CHW : public Processor { - public: - bool CpuRun(Mat* mat); -#ifdef ENABLE_OPENCV_CUDA - bool GpuRun(Mat* mat); -#endif - std::string Name() { return "HWC2CHW"; } - - static bool Run(Mat* mat, ProcLib lib = ProcLib::OPENCV_CPU); -}; -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/common/processors/mat.cc b/csrcs/fastdeploy/vision/common/processors/mat.cc deleted file mode 100644 index 2afffa416..000000000 --- a/csrcs/fastdeploy/vision/common/processors/mat.cc +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#include "fastdeploy/vision/common/processors/mat.h" -#include "fastdeploy/utils/utils.h" -namespace fastdeploy { -namespace vision { - -#ifdef ENABLE_OPENCV_CUDA -cv::cuda::GpuMat* Mat::GetGpuMat() { - if (device == Device::CPU) { - gpu_mat.upload(cpu_mat); - } - return &gpu_mat; -} -#endif - -cv::Mat* Mat::GetCpuMat() { -#ifdef ENABLE_OPENCV_CUDA - if (device == Device::GPU) { - gpu_mat.download(cpu_mat); - } -#endif - return &cpu_mat; -} - -void Mat::ShareWithTensor(FDTensor* tensor) { - if (device == Device::GPU) { -#ifdef ENABLE_OPENCV_CUDA - tensor->SetExternalData({Channels(), Height(), Width()}, Type(), - GetGpuMat()->ptr()); - tensor->device = Device::GPU; -#endif - } else { - tensor->SetExternalData({Channels(), Height(), Width()}, Type(), - GetCpuMat()->ptr()); - tensor->device = Device::CPU; - } - if (layout == Layout::HWC) { - tensor->shape = {Height(), Width(), Channels()}; - } -} - -bool Mat::CopyToTensor(FDTensor* tensor) { - cv::Mat* im = GetCpuMat(); - int total_bytes = im->total() * im->elemSize(); - if (total_bytes != tensor->Nbytes()) { - FDERROR << "While copy Mat to Tensor, requires the memory size be same, " - "but now size of Tensor = " - << tensor->Nbytes() << ", size of Mat = " << total_bytes << "." - << std::endl; - return false; - } - memcpy(tensor->MutableData(), im->ptr(), im->total() * im->elemSize()); - return true; -} - -void Mat::PrintInfo(const std::string& flag) { - cv::Mat* im = GetCpuMat(); - cv::Scalar mean = cv::mean(*im); - std::cout << flag << ": " - << "Channel=" << Channels() << ", height=" << Height() - << ", width=" << Width() << ", mean="; - for (int i = 0; i < Channels(); ++i) { - std::cout << mean[i] << " "; - } - std::cout << std::endl; -} - -FDDataType Mat::Type() { - int type = -1; - if (device == Device::GPU) { -#ifdef ENABLE_OPENCV_CUDA - type = gpu_mat.type(); -#endif - } else { - type = cpu_mat.type(); - } - if (type < 0) { - FDASSERT(false, - "While calling Mat::Type(), get negative value, which is not " - "expected!."); - } - type = type % 8; - if (type == 0) { - return FDDataType::UINT8; - } else if (type == 1) { - return FDDataType::INT8; - } else if (type == 2) { - FDASSERT(false, "While calling Mat::Type(), get UINT16 type which is not " - "supported now."); - } else if (type == 3) { - return FDDataType::INT16; - } else if (type == 4) { - return FDDataType::INT32; - } else if (type == 5) { - return FDDataType::FP32; - } else if (type == 6) { - return FDDataType::FP64; - } else { - FDASSERT(false, "While calling Mat::Type(), get type = " + - std::to_string(type) + ", which is not expected!."); - } -} - -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/common/processors/mat.h b/csrcs/fastdeploy/vision/common/processors/mat.h deleted file mode 100644 index cf4736238..000000000 --- a/csrcs/fastdeploy/vision/common/processors/mat.h +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#pragma once -#include "fastdeploy/core/fd_tensor.h" -#include "opencv2/core/core.hpp" - -#ifdef ENABLE_OPENCV_CUDA -#include "opencv2/core/cuda.hpp" -#include "opencv2/cudaarithm.hpp" -#include "opencv2/cudaimgproc.hpp" -#include "opencv2/cudawarping.hpp" -#endif - -namespace fastdeploy { -namespace vision { - -enum Layout { HWC, CHW }; - -struct FASTDEPLOY_DECL Mat { - explicit Mat(cv::Mat& mat) { - cpu_mat = mat; - device = Device::CPU; - layout = Layout::HWC; - height = cpu_mat.rows; - width = cpu_mat.cols; - channels = cpu_mat.channels(); - } - - private: - int channels; - int height; - int width; - cv::Mat cpu_mat; -#ifdef ENABLE_OPENCV_CUDA - cv::cuda::GpuMat gpu_mat; -#endif - - public: -#ifdef ENABLE_OPENCV_CUDA - cv::cuda::GpuMat* GetGpuMat(); -#endif - cv::Mat* GetCpuMat(); - - FDDataType Type(); - int Channels() const { return channels; } - int Width() const { return width; } - int Height() const { return height; } - void SetChannels(int s) { channels = s; } - void SetWidth(int w) { width = w; } - void SetHeight(int h) { height = h; } - - // Transfer the vision::Mat to FDTensor - void ShareWithTensor(FDTensor* tensor); - // Only support copy to cpu tensor now - bool CopyToTensor(FDTensor* tensor); - - // debug functions - // TODO(jiangjiajun) Develop a right process pipeline with c++ is not a easy - // things - // Will add more debug function here to help debug processed image - // This function will print shape / mean of each channels of the Mat - void PrintInfo(const std::string& flag); - - Layout layout = Layout::HWC; - Device device = Device::CPU; -}; - -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/common/processors/normalize.cc b/csrcs/fastdeploy/vision/common/processors/normalize.cc deleted file mode 100644 index b75406070..000000000 --- a/csrcs/fastdeploy/vision/common/processors/normalize.cc +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/common/processors/normalize.h" - -namespace fastdeploy { -namespace vision { -Normalize::Normalize(const std::vector& mean, - const std::vector& std, bool is_scale, - const std::vector& min, - const std::vector& max) { - FDASSERT(mean.size() == std.size(), - "Normalize: requires the size of mean equal to the size of std."); - std::vector mean_(mean.begin(), mean.end()); - std::vector std_(std.begin(), std.end()); - std::vector min_(mean.size(), 0.0); - std::vector max_(mean.size(), 255.0); - if (min.size() != 0) { - FDASSERT( - min.size() == mean.size(), - "Normalize: while min is defined, requires the size of min equal to " - "the size of mean."); - min_.assign(min.begin(), min.end()); - } - if (max.size() != 0) { - FDASSERT( - min.size() == mean.size(), - "Normalize: while max is defined, requires the size of max equal to " - "the size of mean."); - max_.assign(max.begin(), max.end()); - } - for (auto c = 0; c < mean_.size(); ++c) { - double alpha = 1.0; - if (is_scale) { - alpha /= (max_[c] - min_[c]); - } - double beta = -1.0 * (mean_[c] + min_[c] * alpha) / std_[c]; - alpha /= std_[c]; - alpha_.push_back(alpha); - beta_.push_back(beta); - } -} - -bool Normalize::CpuRun(Mat* mat) { - cv::Mat* im = mat->GetCpuMat(); - std::vector split_im; - cv::split(*im, split_im); - for (int c = 0; c < im->channels(); c++) { - split_im[c].convertTo(split_im[c], CV_32FC1, alpha_[c], beta_[c]); - } - cv::merge(split_im, *im); - return true; -} - -#ifdef ENABLE_OPENCV_CUDA -bool Normalize::GpuRun(Mat* mat) { - cv::cuda::GpuMat* im = mat->GetGpuMat(); - std::vector split_im; - cv::cuda::split(*im, split_im); - for (int c = 0; c < im->channels(); c++) { - split_im[c].convertTo(split_im[c], CV_32FC1, alpha_[c], beta_[c]); - } - cv::cuda::merge(split_im, *im); - return true; -} -#endif - -bool Normalize::Run(Mat* mat, const std::vector& mean, - const std::vector& std, bool is_scale, - const std::vector& min, - const std::vector& max, ProcLib lib) { - auto n = Normalize(mean, std, is_scale, min, max); - return n(mat, lib); -} - -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/common/processors/normalize.h b/csrcs/fastdeploy/vision/common/processors/normalize.h deleted file mode 100644 index b8a66e945..000000000 --- a/csrcs/fastdeploy/vision/common/processors/normalize.h +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "fastdeploy/vision/common/processors/base.h" - -namespace fastdeploy { -namespace vision { -class Normalize : public Processor { - public: - Normalize(const std::vector& mean, const std::vector& std, - bool is_scale = true, - const std::vector& min = std::vector(), - const std::vector& max = std::vector()); - bool CpuRun(Mat* mat); -#ifdef ENABLE_OPENCV_CUDA - bool GpuRun(Mat* mat); -#endif - std::string Name() { return "Normalize"; } - - // While use normalize, it is more recommend not use this function - // this function will need to compute result = ((mat / 255) - mean) / std - // if we use the following method - // ``` - // auto norm = Normalize(...) - // norm(mat) - // ``` - // There will be some precomputation in contruct function - // and the `norm(mat)` only need to compute result = mat * alpha + beta - // which will reduce lots of time - static bool Run(Mat* mat, const std::vector& mean, - const std::vector& std, bool is_scale = true, - const std::vector& min = std::vector(), - const std::vector& max = std::vector(), - ProcLib lib = ProcLib::OPENCV_CPU); - private: - std::vector alpha_; - std::vector beta_; -}; -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/common/processors/pad.cc b/csrcs/fastdeploy/vision/common/processors/pad.cc deleted file mode 100644 index 3b26d28bc..000000000 --- a/csrcs/fastdeploy/vision/common/processors/pad.cc +++ /dev/null @@ -1,100 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/common/processors/pad.h" - -namespace fastdeploy { -namespace vision { - -bool Pad::CpuRun(Mat* mat) { - if (mat->layout != Layout::HWC) { - FDERROR << "Pad: The input data must be Layout::HWC format!" << std::endl; - return false; - } - if (mat->Channels() > 4) { - FDERROR << "Pad: Only support channels <= 4." << std::endl; - return false; - } - if (mat->Channels() != value_.size()) { - FDERROR << "Pad: Require input channels equals to size of padding value, " - "but now channels = " - << mat->Channels() - << ", the size of padding values = " << value_.size() << "." - << std::endl; - return false; - } - cv::Mat* im = mat->GetCpuMat(); - cv::Scalar value; - if (value_.size() == 1) { - value = cv::Scalar(value_[0]); - } else if (value_.size() == 2) { - value = cv::Scalar(value_[0], value_[1]); - } else if (value_.size() == 3) { - value = cv::Scalar(value_[0], value_[1], value_[2]); - } else { - value = cv::Scalar(value_[0], value_[1], value_[2], value_[3]); - } - cv::copyMakeBorder(*im, *im, top_, bottom_, left_, right_, - cv::BORDER_CONSTANT, value); - mat->SetHeight(im->rows); - mat->SetWidth(im->cols); - return true; -} - -#ifdef ENABLE_OPENCV_CUDA -bool Pad::GpuRun(Mat* mat) { - if (mat->layout != Layout::HWC) { - FDERROR << "Pad: The input data must be Layout::HWC format!" << std::endl; - return false; - } - if (mat->Channels() > 4) { - FDERROR << "Pad: Only support channels <= 4." << std::endl; - return false; - } - if (mat->Channels() != value_.size()) { - FDERROR << "Pad: Require input channels equals to size of padding value, " - "but now channels = " - << mat->Channels() - << ", the size of padding values = " << value_.size() << "." - << std::endl; - return false; - } - cv::cuda::GpuMat* im = mat->GetGpuMat(); - cv::Scalar value; - if (value_.size() == 1) { - value = cv::Scalar(value_[0]); - } else if (value_.size() == 2) { - value = cv::Scalar(value_[0], value_[1]); - } else if (value_.size() == 3) { - value = cv::Scalar(value_[0], value_[1], value_[2]); - } else { - value = cv::Scalar(value_[0], value_[1], value_[2], value_[3]); - } - cv::cuda::copyMakeBorder(*im, *im, top_, bottom_, left_, right_, - cv::BORDER_CONSTANT, value); - mat->SetHeight(im->rows); - mat->SetWidth(im->cols); - return true; -} -#endif - -bool Pad::Run(Mat* mat, const int& top, const int& bottom, const int& left, - const int& right, const std::vector& value, - ProcLib lib) { - auto p = Pad(top, bottom, left, right, value); - return p(mat, lib); -} - -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/common/processors/pad.h b/csrcs/fastdeploy/vision/common/processors/pad.h deleted file mode 100644 index 110365960..000000000 --- a/csrcs/fastdeploy/vision/common/processors/pad.h +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "fastdeploy/vision/common/processors/base.h" - -namespace fastdeploy { -namespace vision { - -class Pad : public Processor { - public: - Pad(int top, int bottom, int left, int right, - const std::vector& value) { - top_ = top; - bottom_ = bottom; - left_ = left; - right_ = right; - value_ = value; - } - bool CpuRun(Mat* mat); -#ifdef ENABLE_OPENCV_CUDA - bool GpuRun(Mat* mat); -#endif - std::string Name() { return "Pad"; } - - static bool Run(Mat* mat, const int& top, const int& bottom, const int& left, - const int& right, const std::vector& value, - ProcLib lib = ProcLib::OPENCV_CPU); - - private: - int top_; - int bottom_; - int left_; - int right_; - std::vector value_; -}; -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/common/processors/pad_to_size.cc b/csrcs/fastdeploy/vision/common/processors/pad_to_size.cc deleted file mode 100644 index d4cbacd87..000000000 --- a/csrcs/fastdeploy/vision/common/processors/pad_to_size.cc +++ /dev/null @@ -1,141 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/common/processors/pad_to_size.h" - -namespace fastdeploy { -namespace vision { - -bool PadToSize::CpuRun(Mat* mat) { - if (mat->layout != Layout::HWC) { - FDERROR << "PadToSize: The input data must be Layout::HWC format!" - << std::endl; - return false; - } - if (mat->Channels() > 4) { - FDERROR << "PadToSize: Only support channels <= 4." << std::endl; - return false; - } - if (mat->Channels() != value_.size()) { - FDERROR - << "PadToSize: Require input channels equals to size of padding value, " - "but now channels = " - << mat->Channels() << ", the size of padding values = " << value_.size() - << "." << std::endl; - return false; - } - int origin_w = mat->Width(); - int origin_h = mat->Height(); - if (origin_w > width_) { - FDERROR << "PadToSize: the input width:" << origin_w - << " is greater than the target width: " << width_ << "." - << std::endl; - return false; - } - if (origin_h > height_) { - FDERROR << "PadToSize: the input height:" << origin_h - << " is greater than the target height: " << height_ << "." - << std::endl; - return false; - } - if (origin_w == width_ && origin_h == height_) { - return true; - } - - cv::Mat* im = mat->GetCpuMat(); - cv::Scalar value; - if (value_.size() == 1) { - value = cv::Scalar(value_[0]); - } else if (value_.size() == 2) { - value = cv::Scalar(value_[0], value_[1]); - } else if (value_.size() == 3) { - value = cv::Scalar(value_[0], value_[1], value_[2]); - } else { - value = cv::Scalar(value_[0], value_[1], value_[2], value_[3]); - } - // top, bottom, left, right - cv::copyMakeBorder(*im, *im, 0, height_ - origin_h, 0, width_ - origin_w, - cv::BORDER_CONSTANT, value); - mat->SetHeight(height_); - mat->SetWidth(width_); - return true; -} - -#ifdef ENABLE_OPENCV_CUDA -bool PadToSize::GpuRun(Mat* mat) { - if (mat->layout != Layout::HWC) { - FDERROR << "PadToSize: The input data must be Layout::HWC format!" - << std::endl; - return false; - } - if (mat->Channels() > 4) { - FDERROR << "PadToSize: Only support channels <= 4." << std::endl; - return false; - } - if (mat->Channels() != value_.size()) { - FDERROR - << "PadToSize: Require input channels equals to size of padding value, " - "but now channels = " - << mat->Channels() << ", the size of padding values = " << value_.size() - << "." << std::endl; - return false; - } - - int origin_w = mat->Width(); - int origin_h = mat->Height(); - if (origin_w > width_) { - FDERROR << "PadToSize: the input width:" << origin_w - << " is greater than the target width: " << width_ << "." - << std::endl; - return false; - } - if (origin_h > height_) { - FDERROR << "PadToSize: the input height:" << origin_h - << " is greater than the target height: " << height_ << "." - << std::endl; - return false; - } - if (origin_w == width_ && origin_h == height_) { - return true; - } - - cv::cuda::GpuMat* im = mat->GetGpuMat(); - cv::Scalar value; - if (value_.size() == 1) { - value = cv::Scalar(value_[0]); - } else if (value_.size() == 2) { - value = cv::Scalar(value_[0], value_[1]); - } else if (value_.size() == 3) { - value = cv::Scalar(value_[0], value_[1], value_[2]); - } else { - value = cv::Scalar(value_[0], value_[1], value_[2], value_[3]); - } - - // top, bottom, left, right - cv::cuda::copyMakeBorder(*im, *im, 0, height_ - origin_h, 0, - width_ - origin_w, cv::BORDER_CONSTANT, value); - mat->SetHeight(height_); - mat->SetWidth(width_); - return true; -} -#endif - -bool PadToSize::Run(Mat* mat, int width, int height, - const std::vector& value, ProcLib lib) { - auto p = PadToSize(width, height, value); - return p(mat, lib); -} - -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/common/processors/pad_to_size.h b/csrcs/fastdeploy/vision/common/processors/pad_to_size.h deleted file mode 100644 index ece0158f7..000000000 --- a/csrcs/fastdeploy/vision/common/processors/pad_to_size.h +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "fastdeploy/vision/common/processors/base.h" - -namespace fastdeploy { -namespace vision { - -class PadToSize : public Processor { - public: - // only support pad with left-top padding mode - PadToSize(int width, int height, const std::vector& value) { - width_ = width; - height_ = height; - value_ = value; - } - bool CpuRun(Mat* mat); -#ifdef ENABLE_OPENCV_CUDA - bool GpuRun(Mat* mat); -#endif - std::string Name() { return "PadToSize"; } - - static bool Run(Mat* mat, int width, int height, - const std::vector& value, - ProcLib lib = ProcLib::OPENCV_CPU); - - private: - int width_; - int height_; - std::vector value_; -}; -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/common/processors/resize.cc b/csrcs/fastdeploy/vision/common/processors/resize.cc deleted file mode 100644 index d6b8b9e2f..000000000 --- a/csrcs/fastdeploy/vision/common/processors/resize.cc +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/common/processors/resize.h" - -namespace fastdeploy { -namespace vision { - -bool Resize::CpuRun(Mat* mat) { - if (mat->layout != Layout::HWC) { - FDERROR << "Resize: The format of input is not HWC." << std::endl; - return false; - } - cv::Mat* im = mat->GetCpuMat(); - int origin_w = im->cols; - int origin_h = im->rows; - if (width_ > 0 && height_ > 0) { - if (use_scale_) { - float scale_w = width_ * 1.0 / origin_w; - float scale_h = height_ * 1.0 / origin_h; - cv::resize(*im, *im, cv::Size(0, 0), scale_w, scale_h, interp_); - } else { - cv::resize(*im, *im, cv::Size(width_, height_), 0, 0, interp_); - } - } else if (scale_w_ > 0 && scale_h_ > 0) { - cv::resize(*im, *im, cv::Size(0, 0), scale_w_, scale_h_, interp_); - } else { - FDERROR << "Resize: the parameters must satisfy (width > 0 && height > 0) " - "or (scale_w > 0 && scale_h > 0)." - << std::endl; - return false; - } - mat->SetWidth(im->cols); - mat->SetHeight(im->rows); - return true; -} - -#ifdef ENABLE_OPENCV_CUDA -bool Resize::GpuRun(Mat* mat) { - if (mat->layout != Layout::HWC) { - FDERROR << "Resize: The format of input is not HWC." << std::endl; - return false; - } - cv::cuda::GpuMat* im = mat->GetGpuMat(); - int origin_w = im->cols; - int origin_h = im->rows; - if (width_ > 0 && height_ > 0) { - if (use_scale_) { - float scale_w = width_ * 1.0 / origin_w; - float scale_h = height_ * 1.0 / origin_h; - cv::cuda::resize(*im, *im, cv::Size(0, 0), scale_w, scale_h, interp_); - } else { - cv::cuda::resize(*im, *im, cv::Size(width_, height_), 0, 0, interp_); - } - } else if (scale_w_ > 0 && scale_h_ > 0) { - cv::cuda::resize(*im, *im, cv::Size(0, 0), scale_w_, scale_h_, interp_); - } else { - FDERROR << "Resize: the parameters must satisfy (width > 0 && height > 0) " - "or (scale_w > 0 && scale_h > 0)." - << std::endl; - return false; - } - mat->SetWidth(im->cols); - mat->SetHeight(im->rows); - return true; -} -#endif - -bool Resize::Run(Mat* mat, int width, int height, float scale_w, float scale_h, - int interp, bool use_scale, ProcLib lib) { - if (mat->Height() == height && mat->Width() == width) { - return true; - } - auto r = Resize(width, height, scale_w, scale_h, interp, use_scale); - return r(mat, lib); -} - -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/common/processors/resize.h b/csrcs/fastdeploy/vision/common/processors/resize.h deleted file mode 100644 index 5b6e9c025..000000000 --- a/csrcs/fastdeploy/vision/common/processors/resize.h +++ /dev/null @@ -1,63 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "fastdeploy/vision/common/processors/base.h" - -namespace fastdeploy { -namespace vision { - -class Resize : public Processor { - public: - Resize(int width, int height, float scale_w = -1.0, float scale_h = -1.0, - int interp = 1, bool use_scale = false) { - width_ = width; - height_ = height; - scale_w_ = scale_w; - scale_h_ = scale_h; - interp_ = interp; - use_scale_ = use_scale; - } - - bool CpuRun(Mat* mat); -#ifdef ENABLE_OPENCV_CUDA - bool GpuRun(Mat* mat); -#endif - std::string Name() { return "Resize"; } - - static bool Run(Mat* mat, int width, int height, float scale_w = -1.0, - float scale_h = -1.0, int interp = 1, bool use_scale = false, - ProcLib lib = ProcLib::OPENCV_CPU); - - bool SetWidthAndHeight(int width, int height) { - width_ = width; - height_ = height; - return true; - } - - std::tuple GetWidthAndHeight() { - return std::make_tuple(width_, height_); - } - - private: - int width_; - int height_; - float scale_w_ = -1.0; - float scale_h_ = -1.0; - int interp_ = 1; - bool use_scale_ = false; -}; -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/common/processors/resize_by_short.cc b/csrcs/fastdeploy/vision/common/processors/resize_by_short.cc deleted file mode 100644 index 8e850425f..000000000 --- a/csrcs/fastdeploy/vision/common/processors/resize_by_short.cc +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/common/processors/resize_by_short.h" - -namespace fastdeploy { -namespace vision { - -bool ResizeByShort::CpuRun(Mat* mat) { - cv::Mat* im = mat->GetCpuMat(); - int origin_w = im->cols; - int origin_h = im->rows; - double scale = GenerateScale(origin_w, origin_h); - if (use_scale_) { - cv::resize(*im, *im, cv::Size(), scale, scale, interp_); - } else { - int width = static_cast(round(scale * im->cols)); - int height = static_cast(round(scale * im->rows)); - cv::resize(*im, *im, cv::Size(width, height), 0, 0, interp_); - } - mat->SetWidth(im->cols); - mat->SetHeight(im->rows); - return true; -} - -#ifdef ENABLE_OPENCV_CUDA -bool ResizeByShort::GpuRun(Mat* mat) { - cv::cuda::GpuMat* im = mat->GetGpuMat(); - int origin_w = im->cols; - int origin_h = im->rows; - double scale = GenerateScale(origin_w, origin_h); - im->convertTo(*im, CV_32FC(im->channels())); - if (use_scale_) { - cv::cuda::resize(*im, *im, cv::Size(), scale, scale, interp_); - } else { - int width = static_cast(round(scale * im->cols)); - int height = static_cast(round(scale * im->rows)); - cv::cuda::resize(*im, *im, cv::Size(width, height), 0, 0, interp_); - } - mat->SetWidth(im->cols); - mat->SetHeight(im->rows); - return true; -} -#endif - -double ResizeByShort::GenerateScale(const int origin_w, const int origin_h) { - int im_size_max = std::max(origin_w, origin_h); - int im_size_min = std::min(origin_w, origin_h); - double scale = - static_cast(target_size_) / static_cast(im_size_min); - if (max_size_ > 0) { - if (round(scale * im_size_max) > max_size_) { - scale = static_cast(max_size_) / static_cast(im_size_max); - } - } - return scale; -} - -bool ResizeByShort::Run(Mat* mat, int target_size, int interp, bool use_scale, - int max_size, ProcLib lib) { - auto r = ResizeByShort(target_size, interp, use_scale, max_size); - return r(mat, lib); -} -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/common/processors/resize_by_short.h b/csrcs/fastdeploy/vision/common/processors/resize_by_short.h deleted file mode 100644 index 023748e9e..000000000 --- a/csrcs/fastdeploy/vision/common/processors/resize_by_short.h +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "fastdeploy/vision/common/processors/base.h" - -namespace fastdeploy { -namespace vision { - -class ResizeByShort : public Processor { - public: - ResizeByShort(int target_size, int interp = 1, bool use_scale = true, - int max_size = -1) { - target_size_ = target_size; - max_size_ = max_size; - interp_ = interp; - use_scale_ = use_scale; - } - bool CpuRun(Mat* mat); -#ifdef ENABLE_OPENCV_CUDA - bool GpuRun(Mat* mat); -#endif - std::string Name() { return "ResizeByShort"; } - - static bool Run(Mat* mat, int target_size, int interp = 1, - bool use_scale = true, int max_size = -1, - ProcLib lib = ProcLib::OPENCV_CPU); - - private: - double GenerateScale(const int origin_w, const int origin_h); - int target_size_; - int max_size_; - int interp_; - bool use_scale_; -}; -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/common/processors/stride_pad.cc b/csrcs/fastdeploy/vision/common/processors/stride_pad.cc deleted file mode 100644 index 8597c8375..000000000 --- a/csrcs/fastdeploy/vision/common/processors/stride_pad.cc +++ /dev/null @@ -1,124 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/common/processors/stride_pad.h" - -namespace fastdeploy { -namespace vision { - -bool StridePad::CpuRun(Mat* mat) { - if (mat->layout != Layout::HWC) { - FDERROR << "StridePad: The input data must be Layout::HWC format!" - << std::endl; - return false; - } - if (mat->Channels() > 4) { - FDERROR << "StridePad: Only support channels <= 4." << std::endl; - return false; - } - if (mat->Channels() != value_.size()) { - FDERROR - << "StridePad: Require input channels equals to size of padding value, " - "but now channels = " - << mat->Channels() << ", the size of padding values = " << value_.size() - << "." << std::endl; - return false; - } - int origin_w = mat->Width(); - int origin_h = mat->Height(); - - int pad_h = (mat->Height() / stride_) * stride_ + - (mat->Height() % stride_ != 0) * stride_ - mat->Height(); - int pad_w = (mat->Width() / stride_) * stride_ + - (mat->Width() % stride_ != 0) * stride_ - mat->Width(); - if (pad_h == 0 && pad_w == 0) { - return true; - } - cv::Mat* im = mat->GetCpuMat(); - cv::Scalar value; - if (value_.size() == 1) { - value = cv::Scalar(value_[0]); - } else if (value_.size() == 2) { - value = cv::Scalar(value_[0], value_[1]); - } else if (value_.size() == 3) { - value = cv::Scalar(value_[0], value_[1], value_[2]); - } else { - value = cv::Scalar(value_[0], value_[1], value_[2], value_[3]); - } - // top, bottom, left, right - cv::copyMakeBorder(*im, *im, 0, pad_h, 0, pad_w, cv::BORDER_CONSTANT, value); - mat->SetHeight(origin_h + pad_h); - mat->SetWidth(origin_w + pad_w); - return true; -} - -#ifdef ENABLE_OPENCV_CUDA -bool StridePad::GpuRun(Mat* mat) { - if (mat->layout != Layout::HWC) { - FDERROR << "StridePad: The input data must be Layout::HWC format!" - << std::endl; - return false; - } - if (mat->Channels() > 4) { - FDERROR << "StridePad: Only support channels <= 4." << std::endl; - return false; - } - if (mat->Channels() != value_.size()) { - FDERROR - << "StridePad: Require input channels equals to size of padding value, " - "but now channels = " - << mat->Channels() << ", the size of padding values = " << value_.size() - << "." << std::endl; - return false; - } - - int origin_w = mat->Width(); - int origin_h = mat->Height(); - int pad_h = (mat->Height() / stride_) * stride_ + - (mat->Height() % stride_ != 0) * stride_; - int pad_w = (mat->Width() / stride_) * stride_ + - (mat->Width() % stride_ != 0) * stride_; - if (pad_h == 0 && pad_w == 0) { - return true; - } - - cv::cuda::GpuMat* im = mat->GetGpuMat(); - cv::Scalar value; - if (value_.size() == 1) { - value = cv::Scalar(value_[0]); - } else if (value_.size() == 2) { - value = cv::Scalar(value_[0], value_[1]); - } else if (value_.size() == 3) { - value = cv::Scalar(value_[0], value_[1], value_[2]); - } else { - value = cv::Scalar(value_[0], value_[1], value_[2], value_[3]); - } - - // top, bottom, left, right - cv::cuda::copyMakeBorder(*im, *im, 0, pad_h, 0, pad_w, cv::BORDER_CONSTANT, - value); - mat->SetHeight(origin_h + pad_h); - mat->SetWidth(origin_w + pad_w); - return true; -} -#endif - -bool StridePad::Run(Mat* mat, int stride, const std::vector& value, - ProcLib lib) { - auto p = StridePad(stride, value); - return p(mat, lib); -} - -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/common/processors/stride_pad.h b/csrcs/fastdeploy/vision/common/processors/stride_pad.h deleted file mode 100644 index c002ca697..000000000 --- a/csrcs/fastdeploy/vision/common/processors/stride_pad.h +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "fastdeploy/vision/common/processors/base.h" - -namespace fastdeploy { -namespace vision { - -class StridePad : public Processor { - public: - // only support pad with left-top padding mode - StridePad(int stride, const std::vector& value) { - stride_ = stride; - value_ = value; - } - bool CpuRun(Mat* mat); -#ifdef ENABLE_OPENCV_CUDA - bool GpuRun(Mat* mat); -#endif - std::string Name() { return "StridePad"; } - - static bool Run(Mat* mat, int stride, - const std::vector& value = std::vector(), - ProcLib lib = ProcLib::OPENCV_CPU); - - private: - int stride_ = 32; - std::vector value_; -}; -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/common/processors/transform.h b/csrcs/fastdeploy/vision/common/processors/transform.h deleted file mode 100644 index fed3d0c9a..000000000 --- a/csrcs/fastdeploy/vision/common/processors/transform.h +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "fastdeploy/vision/common/processors/cast.h" -#include "fastdeploy/vision/common/processors/center_crop.h" -#include "fastdeploy/vision/common/processors/color_space_convert.h" -#include "fastdeploy/vision/common/processors/convert.h" -#include "fastdeploy/vision/common/processors/hwc2chw.h" -#include "fastdeploy/vision/common/processors/normalize.h" -#include "fastdeploy/vision/common/processors/pad.h" -#include "fastdeploy/vision/common/processors/pad_to_size.h" -#include "fastdeploy/vision/common/processors/resize.h" -#include "fastdeploy/vision/common/processors/resize_by_short.h" -#include "fastdeploy/vision/common/processors/stride_pad.h" diff --git a/csrcs/fastdeploy/vision/common/result.cc b/csrcs/fastdeploy/vision/common/result.cc deleted file mode 100644 index 854d6fcab..000000000 --- a/csrcs/fastdeploy/vision/common/result.cc +++ /dev/null @@ -1,306 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#include "fastdeploy/vision/common/result.h" - -namespace fastdeploy { -namespace vision { - -void ClassifyResult::Clear() { - std::vector().swap(label_ids); - std::vector().swap(scores); -} - -std::string ClassifyResult::Str() { - std::string out; - out = "ClassifyResult(\nlabel_ids: "; - for (size_t i = 0; i < label_ids.size(); ++i) { - out = out + std::to_string(label_ids[i]) + ", "; - } - out += "\nscores: "; - for (size_t i = 0; i < label_ids.size(); ++i) { - out = out + std::to_string(scores[i]) + ", "; - } - out += "\n)"; - return out; -} - -DetectionResult::DetectionResult(const DetectionResult& res) { - boxes.assign(res.boxes.begin(), res.boxes.end()); - scores.assign(res.scores.begin(), res.scores.end()); - label_ids.assign(res.label_ids.begin(), res.label_ids.end()); -} - -void DetectionResult::Clear() { - std::vector>().swap(boxes); - std::vector().swap(scores); - std::vector().swap(label_ids); -} - -void DetectionResult::Reserve(int size) { - boxes.reserve(size); - scores.reserve(size); - label_ids.reserve(size); -} - -void DetectionResult::Resize(int size) { - boxes.resize(size); - scores.resize(size); - label_ids.resize(size); -} - -std::string DetectionResult::Str() { - std::string out; - out = "DetectionResult: [xmin, ymin, xmax, ymax, score, label_id]\n"; - for (size_t i = 0; i < boxes.size(); ++i) { - out = out + std::to_string(boxes[i][0]) + "," + - std::to_string(boxes[i][1]) + ", " + std::to_string(boxes[i][2]) + - ", " + std::to_string(boxes[i][3]) + ", " + - std::to_string(scores[i]) + ", " + std::to_string(label_ids[i]) + - "\n"; - } - return out; -} - -FaceDetectionResult::FaceDetectionResult(const FaceDetectionResult& res) { - boxes.assign(res.boxes.begin(), res.boxes.end()); - landmarks.assign(res.landmarks.begin(), res.landmarks.end()); - scores.assign(res.scores.begin(), res.scores.end()); - landmarks_per_face = res.landmarks_per_face; -} - -void FaceDetectionResult::Clear() { - std::vector>().swap(boxes); - std::vector().swap(scores); - std::vector>().swap(landmarks); - landmarks_per_face = 0; -} - -void FaceDetectionResult::Reserve(int size) { - boxes.reserve(size); - scores.reserve(size); - if (landmarks_per_face > 0) { - landmarks.reserve(size * landmarks_per_face); - } -} - -void FaceDetectionResult::Resize(int size) { - boxes.resize(size); - scores.resize(size); - if (landmarks_per_face > 0) { - landmarks.resize(size * landmarks_per_face); - } -} - -std::string FaceDetectionResult::Str() { - std::string out; - // format without landmarks - if (landmarks_per_face <= 0) { - out = "FaceDetectionResult: [xmin, ymin, xmax, ymax, score]\n"; - for (size_t i = 0; i < boxes.size(); ++i) { - out = out + std::to_string(boxes[i][0]) + "," + - std::to_string(boxes[i][1]) + ", " + std::to_string(boxes[i][2]) + - ", " + std::to_string(boxes[i][3]) + ", " + - std::to_string(scores[i]) + "\n"; - } - return out; - } - // format with landmarks - FDASSERT((landmarks.size() == boxes.size() * landmarks_per_face), - "The size of landmarks != boxes.size * landmarks_per_face."); - out = "FaceDetectionResult: [xmin, ymin, xmax, ymax, score, (x, y) x " + - std::to_string(landmarks_per_face) + "]\n"; - for (size_t i = 0; i < boxes.size(); ++i) { - out = out + std::to_string(boxes[i][0]) + "," + - std::to_string(boxes[i][1]) + ", " + std::to_string(boxes[i][2]) + - ", " + std::to_string(boxes[i][3]) + ", " + - std::to_string(scores[i]) + ", "; - for (size_t j = 0; j < landmarks_per_face; ++j) { - out = out + "(" + - std::to_string(landmarks[i * landmarks_per_face + j][0]) + "," + - std::to_string(landmarks[i * landmarks_per_face + j][1]); - if (j < landmarks_per_face - 1) { - out = out + "), "; - } else { - out = out + ")\n"; - } - } - } - return out; -} - -void SegmentationResult::Clear() { - std::vector().swap(label_map); - std::vector().swap(score_map); - std::vector().swap(shape); - contain_score_map = false; -} - -void SegmentationResult::Reserve(int size) { - label_map.reserve(size); - if (contain_score_map > 0) { - score_map.reserve(size); - } -} - -void SegmentationResult::Resize(int size) { - label_map.resize(size); - if (contain_score_map) { - score_map.resize(size); - } -} - -std::string SegmentationResult::Str() { - std::string out; - out = "SegmentationResult Image masks 10 rows x 10 cols: \n"; - for (size_t i = 0; i < 10; ++i) { - out += "["; - for (size_t j = 0; j < 10; ++j) { - out = out + std::to_string(label_map[i * 10 + j]) + ", "; - } - out += ".....]\n"; - } - out += "...........\n"; - if (contain_score_map) { - out += "SegmentationResult Score map 10 rows x 10 cols: \n"; - for (size_t i = 0; i < 10; ++i) { - out += "["; - for (size_t j = 0; j < 10; ++j) { - out = out + std::to_string(score_map[i * 10 + j]) + ", "; - } - out += ".....]\n"; - } - out += "...........\n"; - } - out += "result shape is: [" + std::to_string(shape[0]) + " " + - std::to_string(shape[1]) + "]"; - return out; -} - -FaceRecognitionResult::FaceRecognitionResult(const FaceRecognitionResult& res) { - embedding.assign(res.embedding.begin(), res.embedding.end()); -} - -void FaceRecognitionResult::Clear() { std::vector().swap(embedding); } - -void FaceRecognitionResult::Reserve(int size) { embedding.reserve(size); } - -void FaceRecognitionResult::Resize(int size) { embedding.resize(size); } - -std::string FaceRecognitionResult::Str() { - std::string out; - out = "FaceRecognitionResult: ["; - size_t numel = embedding.size(); - if (numel <= 0) { - return out + "Empty Result]"; - } - // max, min, mean - float min_val = embedding.at(0); - float max_val = embedding.at(0); - float total_val = embedding.at(0); - for (size_t i = 1; i < numel; ++i) { - float val = embedding.at(i); - total_val += val; - if (val < min_val) { - min_val = val; - } - if (val > max_val) { - max_val = val; - } - } - float mean_val = total_val / static_cast(numel); - out = out + "Dim(" + std::to_string(numel) + "), " + "Min(" + - std::to_string(min_val) + "), " + "Max(" + std::to_string(max_val) + - "), " + "Mean(" + std::to_string(mean_val) + ")]\n"; - return out; -} - -MattingResult::MattingResult(const MattingResult& res) { - alpha.assign(res.alpha.begin(), res.alpha.end()); - foreground.assign(res.foreground.begin(), res.foreground.end()); - shape.assign(res.shape.begin(), res.shape.end()); - contain_foreground = res.contain_foreground; -} - -void MattingResult::Clear() { - std::vector().swap(alpha); - std::vector().swap(foreground); - std::vector().swap(shape); - contain_foreground = false; -} - -void MattingResult::Reserve(int size) { - alpha.reserve(size); - if (contain_foreground) { - FDASSERT((shape.size() == 3), - "Please initial shape (h,w,c) before call Reserve."); - int c = static_cast(shape[3]); - foreground.reserve(size * c); - } -} - -void MattingResult::Resize(int size) { - alpha.resize(size); - if (contain_foreground) { - FDASSERT((shape.size() == 3), - "Please initial shape (h,w,c) before call Resize."); - int c = static_cast(shape[3]); - foreground.resize(size * c); - } -} - -std::string MattingResult::Str() { - std::string out; - out = "MattingResult["; - if (contain_foreground) { - out += "Foreground(true)"; - } else { - out += "Foreground(false)"; - } - out += ", Alpha("; - size_t numel = alpha.size(); - if (numel <= 0) { - return out + "[Empty Result]"; - } - // max, min, mean - float min_val = alpha.at(0); - float max_val = alpha.at(0); - float total_val = alpha.at(0); - for (size_t i = 1; i < numel; ++i) { - float val = alpha.at(i); - total_val += val; - if (val < min_val) { - min_val = val; - } - if (val > max_val) { - max_val = val; - } - } - float mean_val = total_val / static_cast(numel); - // shape - std::string shape_str = "Shape("; - for (size_t i = 0; i < shape.size(); ++i) { - if ((i + 1) != shape.size()) { - shape_str += std::to_string(shape[i]) + ","; - } else { - shape_str += std::to_string(shape[i]) + ")"; - } - } - out = out + "Numel(" + std::to_string(numel) + "), " + shape_str + ", Min(" + - std::to_string(min_val) + "), " + "Max(" + std::to_string(max_val) + - "), " + "Mean(" + std::to_string(mean_val) + "))]\n"; - return out; -} - -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/common/result.h b/csrcs/fastdeploy/vision/common/result.h deleted file mode 100644 index f57178cee..000000000 --- a/csrcs/fastdeploy/vision/common/result.h +++ /dev/null @@ -1,148 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#pragma once -#include "fastdeploy/fastdeploy_model.h" -#include "opencv2/core/core.hpp" - -namespace fastdeploy { -namespace vision { -enum FASTDEPLOY_DECL ResultType { - UNKNOWN_RESULT, - CLASSIFY, - DETECTION, - SEGMENTATION, - FACE_DETECTION, - FACE_RECOGNITION, - MATTING -}; - -struct FASTDEPLOY_DECL BaseResult { - ResultType type = ResultType::UNKNOWN_RESULT; -}; - -struct FASTDEPLOY_DECL ClassifyResult : public BaseResult { - std::vector label_ids; - std::vector scores; - ResultType type = ResultType::CLASSIFY; - - void Clear(); - std::string Str(); -}; - -struct FASTDEPLOY_DECL DetectionResult : public BaseResult { - // box: xmin, ymin, xmax, ymax - std::vector> boxes; - std::vector scores; - std::vector label_ids; - ResultType type = ResultType::DETECTION; - - DetectionResult() {} - DetectionResult(const DetectionResult& res); - - void Clear(); - - void Reserve(int size); - - void Resize(int size); - - std::string Str(); -}; - -struct FASTDEPLOY_DECL FaceDetectionResult : public BaseResult { - // box: xmin, ymin, xmax, ymax - std::vector> boxes; - // landmark: x, y, landmarks may empty if the - // model don't detect face with landmarks. - // Note, one face might have multiple landmarks, - // such as 5/19/21/68/98/..., etc. - std::vector> landmarks; - std::vector scores; - ResultType type = ResultType::FACE_DETECTION; - // set landmarks_per_face manually in your post processes. - int landmarks_per_face; - - FaceDetectionResult() { landmarks_per_face = 0; } - FaceDetectionResult(const FaceDetectionResult& res); - - void Clear(); - - void Reserve(int size); - - void Resize(int size); - - std::string Str(); -}; - -struct FASTDEPLOY_DECL SegmentationResult : public BaseResult { - // mask - std::vector label_map; - std::vector score_map; - std::vector shape; - bool contain_score_map = false; - - ResultType type = ResultType::SEGMENTATION; - - void Clear(); - - void Reserve(int size); - - void Resize(int size); - - std::string Str(); -}; - -struct FASTDEPLOY_DECL FaceRecognitionResult : public BaseResult { - // face embedding vector with 128/256/512 ... dim - std::vector embedding; - - ResultType type = ResultType::FACE_RECOGNITION; - - FaceRecognitionResult() {} - FaceRecognitionResult(const FaceRecognitionResult& res); - - void Clear(); - - void Reserve(int size); - - void Resize(int size); - - std::string Str(); -}; - -struct FASTDEPLOY_DECL MattingResult : public BaseResult { - // alpha matte and fgr (predicted foreground: HWC/BGR float32) - std::vector alpha; // h x w - std::vector foreground; // h x w x c (c=3 default) - // height, width, channel for foreground and alpha - // must be (h,w,c) and setup before Reserve and Resize - // c is only for foreground if contain_foreground is true. - std::vector shape; - bool contain_foreground = false; - - ResultType type = ResultType::MATTING; - - MattingResult() {} - MattingResult(const MattingResult& res); - - void Clear(); - - void Reserve(int size); - - void Resize(int size); - - std::string Str(); -}; - -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/detection/contrib/nanodet_plus.cc b/csrcs/fastdeploy/vision/detection/contrib/nanodet_plus.cc deleted file mode 100644 index 267012c11..000000000 --- a/csrcs/fastdeploy/vision/detection/contrib/nanodet_plus.cc +++ /dev/null @@ -1,355 +0,0 @@ -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/detection/contrib/nanodet_plus.h" -#include "fastdeploy/utils/perf.h" -#include "fastdeploy/vision/utils/utils.h" - -namespace fastdeploy { - -namespace vision { - -namespace detection { - -struct NanoDetPlusCenterPoint { - int grid0; - int grid1; - int stride; -}; - -void GenerateNanoDetPlusCenterPoints( - const std::vector& size, const std::vector& downsample_strides, - std::vector* center_points) { - // size: tuple of input (width, height), e.g (320, 320) - // downsample_strides: downsample strides in NanoDet and - // NanoDet-Plus, e.g (8, 16, 32, 64) - const int width = size[0]; - const int height = size[1]; - for (const auto& ds : downsample_strides) { - int num_grid_w = width / ds; - int num_grid_h = height / ds; - for (int g1 = 0; g1 < num_grid_h; ++g1) { - for (int g0 = 0; g0 < num_grid_w; ++g0) { - (*center_points).emplace_back(NanoDetPlusCenterPoint{g0, g1, ds}); - } - } - } -} - -void WrapAndResize(Mat* mat, std::vector size, std::vector color, - bool keep_ratio = false) { - // Reference: nanodet/data/transform/warp.py#L139 - // size: tuple of input (width, height) - // The default value of `keep_ratio` is `fasle` in - // `config/nanodet-plus-m-1.5x_320.yml` for both - // train and val processes. So, we just let this - // option default `false` according to the official - // implementation in NanoDet and NanoDet-Plus. - // Note, this function will apply a normal resize - // operation to input Mat if the keep_ratio option - // is fasle and the behavior will be the same as - // yolov5's letterbox if keep_ratio is true. - - // with keep_ratio = false (default) - if (!keep_ratio) { - int resize_h = size[1]; - int resize_w = size[0]; - if (resize_h != mat->Height() || resize_w != mat->Width()) { - Resize::Run(mat, resize_w, resize_h); - } - return; - } - // with keep_ratio = true, same as yolov5's letterbox - float r = std::min(size[1] * 1.0f / static_cast(mat->Height()), - size[0] * 1.0f / static_cast(mat->Width())); - - int resize_h = int(round(static_cast(mat->Height()) * r)); - int resize_w = int(round(static_cast(mat->Width()) * r)); - - if (resize_h != mat->Height() || resize_w != mat->Width()) { - Resize::Run(mat, resize_w, resize_h); - } - - int pad_w = size[0] - resize_w; - int pad_h = size[1] - resize_h; - if (pad_h > 0 || pad_w > 0) { - float half_h = pad_h * 1.0 / 2; - int top = int(round(half_h - 0.1)); - int bottom = int(round(half_h + 0.1)); - float half_w = pad_w * 1.0 / 2; - int left = int(round(half_w - 0.1)); - int right = int(round(half_w + 0.1)); - Pad::Run(mat, top, bottom, left, right, color); - } -} - -void GFLRegression(const float* logits, size_t reg_num, float* offset) { - // Hint: reg_num = reg_max + 1 - FDASSERT(((nullptr != logits) && (reg_num != 0)), - "NanoDetPlus: logits is nullptr or reg_num is 0 in GFLRegression."); - // softmax - float total_exp = 0.f; - std::vector softmax_probs(reg_num); - for (size_t i = 0; i < reg_num; ++i) { - softmax_probs[i] = std::exp(logits[i]); - total_exp += softmax_probs[i]; - } - for (size_t i = 0; i < reg_num; ++i) { - softmax_probs[i] = softmax_probs[i] / total_exp; - } - // gfl regression -> offset - for (size_t i = 0; i < reg_num; ++i) { - (*offset) += static_cast(i) * softmax_probs[i]; - } -} - -NanoDetPlus::NanoDetPlus(const std::string& model_file, - const std::string& params_file, - const RuntimeOption& custom_option, - const Frontend& model_format) { - if (model_format == Frontend::ONNX) { - valid_cpu_backends = {Backend::ORT}; // 指定可用的CPU后端 - valid_gpu_backends = {Backend::ORT, Backend::TRT}; // 指定可用的GPU后端 - } else { - valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; - valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; - } - runtime_option = custom_option; - runtime_option.model_format = model_format; - runtime_option.model_file = model_file; - runtime_option.params_file = params_file; - initialized = Initialize(); -} - -bool NanoDetPlus::Initialize() { - // parameters for preprocess - size = {320, 320}; - padding_value = {0.0f, 0.0f, 0.0f}; - keep_ratio = false; - downsample_strides = {8, 16, 32, 64}; - max_wh = 4096.0f; - reg_max = 7; - - if (!InitRuntime()) { - FDERROR << "Failed to initialize fastdeploy backend." << std::endl; - return false; - } - // Check if the input shape is dynamic after Runtime already initialized. - is_dynamic_input_ = false; - auto shape = InputInfoOfRuntime(0).shape; - for (int i = 0; i < shape.size(); ++i) { - // if height or width is dynamic - if (i >= 2 && shape[i] <= 0) { - is_dynamic_input_ = true; - break; - } - } - return true; -} - -bool NanoDetPlus::Preprocess( - Mat* mat, FDTensor* output, - std::map>* im_info) { - // NanoDet-Plus preprocess steps - // 1. WrapAndResize - // 2. HWC->CHW - // 3. Normalize or Convert (keep BGR order) - WrapAndResize(mat, size, padding_value, keep_ratio); - // Record output shape of preprocessed image - (*im_info)["output_shape"] = {static_cast(mat->Height()), - static_cast(mat->Width())}; - - // Compute `result = mat * alpha + beta` directly by channel - // Reference: /config/nanodet-plus-m-1.5x_320.yml#L89 - // from mean: [103.53, 116.28, 123.675], std: [57.375, 57.12, 58.395] - // x' = (x - mean) / std to x'= x * alpha + beta. - // e.g alpha[0] = 0.017429f = 1.0f / 57.375f - // e.g beta[0] = -103.53f * 0.0174291f - std::vector alpha = {0.017429f, 0.017507f, 0.017125f}; - std::vector beta = {-103.53f * 0.0174291f, -116.28f * 0.0175070f, - -123.675f * 0.0171247f}; // BGR order - Convert::Run(mat, alpha, beta); - - HWC2CHW::Run(mat); - Cast::Run(mat, "float"); - mat->ShareWithTensor(output); - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c - return true; -} - -bool NanoDetPlus::Postprocess( - FDTensor& infer_result, DetectionResult* result, - const std::map>& im_info, - float conf_threshold, float nms_iou_threshold) { - FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now."); - result->Clear(); - result->Reserve(infer_result.shape[1]); - if (infer_result.dtype != FDDataType::FP32) { - FDERROR << "Only support post process with float32 data." << std::endl; - return false; - } - // generate center points with dowmsample strides - std::vector center_points; - GenerateNanoDetPlusCenterPoints(size, downsample_strides, ¢er_points); - - // infer_result shape might look like (1,2125,112) - const int num_cls_reg = infer_result.shape[2]; // e.g 112 - const int num_classes = num_cls_reg - (reg_max + 1) * 4; // e.g 80 - float* data = static_cast(infer_result.Data()); - for (size_t i = 0; i < infer_result.shape[1]; ++i) { - float* scores = data + i * num_cls_reg; - float* max_class_score = std::max_element(scores, scores + num_classes); - float confidence = (*max_class_score); - // filter boxes by conf_threshold - if (confidence <= conf_threshold) { - continue; - } - int32_t label_id = std::distance(scores, max_class_score); - // fetch i-th center point - float grid0 = static_cast(center_points.at(i).grid0); - float grid1 = static_cast(center_points.at(i).grid1); - float downsample_stride = static_cast(center_points.at(i).stride); - // apply gfl regression to get offsets (l,t,r,b) - float* logits = data + i * num_cls_reg + num_classes; // 32|44... - std::vector offsets(4); - for (size_t j = 0; j < 4; ++j) { - GFLRegression(logits + j * (reg_max + 1), reg_max + 1, &offsets[j]); - } - // convert from offsets to [x1, y1, x2, y2] - float l = offsets[0]; // left - float t = offsets[1]; // top - float r = offsets[2]; // right - float b = offsets[3]; // bottom - - float x1 = (grid0 - l) * downsample_stride; // cx - l x1 - float y1 = (grid1 - t) * downsample_stride; // cy - t y1 - float x2 = (grid0 + r) * downsample_stride; // cx + r x2 - float y2 = (grid1 + b) * downsample_stride; // cy + b y2 - - result->boxes.emplace_back( - std::array{x1 + label_id * max_wh, y1 + label_id * max_wh, - x2 + label_id * max_wh, y2 + label_id * max_wh}); - // label_id * max_wh for multi classes NMS - result->label_ids.push_back(label_id); - result->scores.push_back(confidence); - } - utils::NMS(result, nms_iou_threshold); - - // scale the boxes to the origin image shape - auto iter_out = im_info.find("output_shape"); - auto iter_ipt = im_info.find("input_shape"); - FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(), - "Cannot find input_shape or output_shape from im_info."); - float out_h = iter_out->second[0]; - float out_w = iter_out->second[1]; - float ipt_h = iter_ipt->second[0]; - float ipt_w = iter_ipt->second[1]; - // without keep_ratio - if (!keep_ratio) { - // x' = (x / out_w) * ipt_w = x / (out_w / ipt_w) - // y' = (y / out_h) * ipt_h = y / (out_h / ipt_h) - float r_w = out_w / ipt_w; - float r_h = out_h / ipt_h; - for (size_t i = 0; i < result->boxes.size(); ++i) { - int32_t label_id = (result->label_ids)[i]; - // clip box - result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id; - result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id; - result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id; - result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id; - result->boxes[i][0] = std::max(result->boxes[i][0] / r_w, 0.0f); - result->boxes[i][1] = std::max(result->boxes[i][1] / r_h, 0.0f); - result->boxes[i][2] = std::max(result->boxes[i][2] / r_w, 0.0f); - result->boxes[i][3] = std::max(result->boxes[i][3] / r_h, 0.0f); - result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f); - result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f); - result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f); - result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f); - } - return true; - } - // with keep_ratio - float r = std::min(out_h / ipt_h, out_w / ipt_w); - float pad_h = (out_h - ipt_h * r) / 2; - float pad_w = (out_w - ipt_w * r) / 2; - for (size_t i = 0; i < result->boxes.size(); ++i) { - int32_t label_id = (result->label_ids)[i]; - // clip box - result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id; - result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id; - result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id; - result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id; - result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / r, 0.0f); - result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / r, 0.0f); - result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / r, 0.0f); - result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / r, 0.0f); - result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f); - result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f); - result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f); - result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f); - } - return true; -} - -bool NanoDetPlus::Predict(cv::Mat* im, DetectionResult* result, - float conf_threshold, float nms_iou_threshold) { -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_START(0) -#endif - - Mat mat(*im); - std::vector input_tensors(1); - - std::map> im_info; - - // Record the shape of image and the shape of preprocessed image - im_info["input_shape"] = {static_cast(mat.Height()), - static_cast(mat.Width())}; - im_info["output_shape"] = {static_cast(mat.Height()), - static_cast(mat.Width())}; - - if (!Preprocess(&mat, &input_tensors[0], &im_info)) { - FDERROR << "Failed to preprocess input image." << std::endl; - return false; - } - -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_END(0, "Preprocess") - TIMERECORD_START(1) -#endif - - input_tensors[0].name = InputInfoOfRuntime(0).name; - std::vector output_tensors; - if (!Infer(input_tensors, &output_tensors)) { - FDERROR << "Failed to inference." << std::endl; - return false; - } -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_END(1, "Inference") - TIMERECORD_START(2) -#endif - - if (!Postprocess(output_tensors[0], result, im_info, conf_threshold, - nms_iou_threshold)) { - FDERROR << "Failed to post process." << std::endl; - return false; - } - -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_END(2, "Postprocess") -#endif - return true; -} - -} // namespace detection -} // namespace vision -} // namespace fastdeploy \ No newline at end of file diff --git a/csrcs/fastdeploy/vision/detection/contrib/nanodet_plus.h b/csrcs/fastdeploy/vision/detection/contrib/nanodet_plus.h deleted file mode 100644 index a407b8715..000000000 --- a/csrcs/fastdeploy/vision/detection/contrib/nanodet_plus.h +++ /dev/null @@ -1,101 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "fastdeploy/fastdeploy_model.h" -#include "fastdeploy/vision/common/processors/transform.h" -#include "fastdeploy/vision/common/result.h" - -namespace fastdeploy { - -namespace vision { - -namespace detection { - -class FASTDEPLOY_DECL NanoDetPlus : public FastDeployModel { - public: - // 当model_format为ONNX时,无需指定params_file - // 当model_format为Paddle时,则需同时指定model_file & params_file - NanoDetPlus(const std::string& model_file, - const std::string& params_file = "", - const RuntimeOption& custom_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX); - - // 定义模型的名称 - std::string ModelName() const { return "nanodet"; } - - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 - // conf_threshold 为后处理的参数 - // nms_iou_threshold 为后处理的参数 - virtual bool Predict(cv::Mat* im, DetectionResult* result, - float conf_threshold = 0.35f, - float nms_iou_threshold = 0.5f); - - // 以下为模型在预测时的一些参数,基本是前后处理所需 - // 用户在创建模型后,可根据模型的要求,以及自己的需求 - // 对参数进行修改 - // tuple of input size (width, height), e.g (320, 320) - std::vector size; - // padding value, size should be same with Channels - std::vector padding_value; - // keep aspect ratio or not when perform resize operation. - // This option is set as `false` by default in NanoDet-Plus. - bool keep_ratio; - // downsample strides for NanoDet-Plus to generate anchors, will - // take (8, 16, 32, 64) as default values. - std::vector downsample_strides; - // for offseting the boxes by classes when using NMS, default 4096. - float max_wh; - // reg_max for GFL regression, default 7 - int reg_max; - - private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 - bool Initialize(); - - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 - // im_info为预处理过程保存的数据,在后处理中需要用到 - bool Preprocess(Mat* mat, FDTensor* output, - std::map>* im_info); - - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 - // im_info 为预处理记录的信息,后处理用于还原box - // conf_threshold 后处理时过滤box的置信度阈值 - // nms_iou_threshold 后处理时NMS设定的iou阈值 - bool Postprocess(FDTensor& infer_result, DetectionResult* result, - const std::map>& im_info, - float conf_threshold, float nms_iou_threshold); - - // 查看输入是否为动态维度的 不建议直接使用 不同模型的逻辑可能不一致 - bool IsDynamicInput() const { return is_dynamic_input_; } - - // whether to inference with dynamic shape (e.g ONNX export with dynamic shape - // or not.) - // RangiLyu/nanodet official 'export_onnx.py' script will export static ONNX - // by default. - // This value will auto check by fastdeploy after the internal Runtime - // initialized. - bool is_dynamic_input_; -}; - -} // namespace detection -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/detection/contrib/nanodet_plus_pybind.cc b/csrcs/fastdeploy/vision/detection/contrib/nanodet_plus_pybind.cc deleted file mode 100644 index b415c0b3b..000000000 --- a/csrcs/fastdeploy/vision/detection/contrib/nanodet_plus_pybind.cc +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/pybind/main.h" - -namespace fastdeploy { -void BindNanoDetPlus(pybind11::module& m) { - pybind11::class_( - m, "NanoDetPlus") - .def(pybind11::init()) - .def("predict", - [](vision::detection::NanoDetPlus& self, pybind11::array& data, - float conf_threshold, float nms_iou_threshold) { - auto mat = PyArrayToCvMat(data); - vision::DetectionResult res; - self.Predict(&mat, &res, conf_threshold, nms_iou_threshold); - return res; - }) - .def_readwrite("size", &vision::detection::NanoDetPlus::size) - .def_readwrite("padding_value", - &vision::detection::NanoDetPlus::padding_value) - .def_readwrite("keep_ratio", &vision::detection::NanoDetPlus::keep_ratio) - .def_readwrite("downsample_strides", - &vision::detection::NanoDetPlus::downsample_strides) - .def_readwrite("max_wh", &vision::detection::NanoDetPlus::max_wh) - .def_readwrite("reg_max", &vision::detection::NanoDetPlus::reg_max); -} -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/detection/contrib/scaledyolov4.cc b/csrcs/fastdeploy/vision/detection/contrib/scaledyolov4.cc deleted file mode 100644 index dff2118f3..000000000 --- a/csrcs/fastdeploy/vision/detection/contrib/scaledyolov4.cc +++ /dev/null @@ -1,255 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/detection/contrib/scaledyolov4.h" -#include "fastdeploy/utils/perf.h" -#include "fastdeploy/vision/utils/utils.h" - -namespace fastdeploy { -namespace vision { -namespace detection { - -void ScaledYOLOv4::LetterBox(Mat* mat, const std::vector& size, - const std::vector& color, bool _auto, - bool scale_fill, bool scale_up, int stride) { - float scale = - std::min(size[1] * 1.0 / mat->Height(), size[0] * 1.0 / mat->Width()); - if (!scale_up) { - scale = std::min(scale, 1.0f); - } - - int resize_h = int(round(mat->Height() * scale)); - int resize_w = int(round(mat->Width() * scale)); - - int pad_w = size[0] - resize_w; - int pad_h = size[1] - resize_h; - if (_auto) { - pad_h = pad_h % stride; - pad_w = pad_w % stride; - } else if (scale_fill) { - pad_h = 0; - pad_w = 0; - resize_h = size[1]; - resize_w = size[0]; - } - if (resize_h != mat->Height() || resize_w != mat->Width()) { - Resize::Run(mat, resize_w, resize_h); - } - if (pad_h > 0 || pad_w > 0) { - float half_h = pad_h * 1.0 / 2; - int top = int(round(half_h - 0.1)); - int bottom = int(round(half_h + 0.1)); - float half_w = pad_w * 1.0 / 2; - int left = int(round(half_w - 0.1)); - int right = int(round(half_w + 0.1)); - Pad::Run(mat, top, bottom, left, right, color); - } -} - -ScaledYOLOv4::ScaledYOLOv4(const std::string& model_file, - const std::string& params_file, - const RuntimeOption& custom_option, - const Frontend& model_format) { - if (model_format == Frontend::ONNX) { - valid_cpu_backends = {Backend::ORT}; // 指定可用的CPU后端 - valid_gpu_backends = {Backend::ORT, Backend::TRT}; // 指定可用的GPU后端 - } else { - valid_cpu_backends = {Backend::PDINFER}; - valid_gpu_backends = {Backend::PDINFER}; - } - runtime_option = custom_option; - runtime_option.model_format = model_format; - runtime_option.model_file = model_file; - runtime_option.params_file = params_file; - initialized = Initialize(); -} - -bool ScaledYOLOv4::Initialize() { - // parameters for preprocess - size = {640, 640}; - padding_value = {114.0, 114.0, 114.0}; - is_mini_pad = false; - is_no_pad = false; - is_scale_up = false; - stride = 32; - max_wh = 7680.0; - - if (!InitRuntime()) { - FDERROR << "Failed to initialize fastdeploy backend." << std::endl; - return false; - } - // Check if the input shape is dynamic after Runtime already initialized, - // Note that, We need to force is_mini_pad 'false' to keep static - // shape after padding (LetterBox) when the is_dynamic_shape is 'false'. - is_dynamic_input_ = false; - auto shape = InputInfoOfRuntime(0).shape; - for (int i = 0; i < shape.size(); ++i) { - // if height or width is dynamic - if (i >= 2 && shape[i] <= 0) { - is_dynamic_input_ = true; - break; - } - } - if (!is_dynamic_input_) { - is_mini_pad = false; - } - return true; -} - -bool ScaledYOLOv4::Preprocess( - Mat* mat, FDTensor* output, - std::map>* im_info) { - // process after image load - float ratio = std::min(size[1] * 1.0f / static_cast(mat->Height()), - size[0] * 1.0f / static_cast(mat->Width())); - if (ratio != 1.0) { - int interp = cv::INTER_AREA; - if (ratio > 1.0) { - interp = cv::INTER_LINEAR; - } - int resize_h = int(mat->Height() * ratio); - int resize_w = int(mat->Width() * ratio); - Resize::Run(mat, resize_w, resize_h, -1, -1, interp); - } - // ScaledYOLOv4's preprocess steps - // 1. letterbox - // 2. BGR->RGB - // 3. HWC->CHW - ScaledYOLOv4::LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad, - is_scale_up, stride); - BGR2RGB::Run(mat); - // Normalize::Run(mat, std::vector(mat->Channels(), 0.0), - // std::vector(mat->Channels(), 1.0)); - // Compute `result = mat * alpha + beta` directly by channel - std::vector alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; - std::vector beta = {0.0f, 0.0f, 0.0f}; - Convert::Run(mat, alpha, beta); - - // Record output shape of preprocessed image - (*im_info)["output_shape"] = {static_cast(mat->Height()), - static_cast(mat->Width())}; - - HWC2CHW::Run(mat); - Cast::Run(mat, "float"); - mat->ShareWithTensor(output); - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c - return true; -} - -bool ScaledYOLOv4::Postprocess( - FDTensor& infer_result, DetectionResult* result, - const std::map>& im_info, - float conf_threshold, float nms_iou_threshold) { - FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now."); - result->Clear(); - result->Reserve(infer_result.shape[1]); - if (infer_result.dtype != FDDataType::FP32) { - FDERROR << "Only support post process with float32 data." << std::endl; - return false; - } - float* data = static_cast(infer_result.Data()); - for (size_t i = 0; i < infer_result.shape[1]; ++i) { - int s = i * infer_result.shape[2]; - float confidence = data[s + 4]; - float* max_class_score = - std::max_element(data + s + 5, data + s + infer_result.shape[2]); - confidence *= (*max_class_score); - // filter boxes by conf_threshold - if (confidence <= conf_threshold) { - continue; - } - int32_t label_id = std::distance(data + s + 5, max_class_score); - // convert from [x, y, w, h] to [x1, y1, x2, y2] - result->boxes.emplace_back(std::array{ - data[s] - data[s + 2] / 2.0f + label_id * max_wh, - data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh, - data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh, - data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh}); - result->label_ids.push_back(label_id); - result->scores.push_back(confidence); - } - utils::NMS(result, nms_iou_threshold); - - // scale the boxes to the origin image shape - auto iter_out = im_info.find("output_shape"); - auto iter_ipt = im_info.find("input_shape"); - FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(), - "Cannot find input_shape or output_shape from im_info."); - float out_h = iter_out->second[0]; - float out_w = iter_out->second[1]; - float ipt_h = iter_ipt->second[0]; - float ipt_w = iter_ipt->second[1]; - float scale = std::min(out_h / ipt_h, out_w / ipt_w); - float pad_h = (out_h - ipt_h * scale) / 2.0f; - float pad_w = (out_w - ipt_w * scale) / 2.0f; - if (is_mini_pad) { - // 和 LetterBox中_auto=true的处理逻辑对应 - pad_h = static_cast(static_cast(pad_h) % stride); - pad_w = static_cast(static_cast(pad_w) % stride); - } - for (size_t i = 0; i < result->boxes.size(); ++i) { - int32_t label_id = (result->label_ids)[i]; - // clip box - result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id; - result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id; - result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id; - result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id; - result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f); - result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f); - result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f); - result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f); - result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f); - result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f); - result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f); - result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f); - } - return true; -} - -bool ScaledYOLOv4::Predict(cv::Mat* im, DetectionResult* result, - float conf_threshold, float nms_iou_threshold) { - Mat mat(*im); - std::vector input_tensors(1); - - std::map> im_info; - - // Record the shape of image and the shape of preprocessed image - im_info["input_shape"] = {static_cast(mat.Height()), - static_cast(mat.Width())}; - im_info["output_shape"] = {static_cast(mat.Height()), - static_cast(mat.Width())}; - - if (!Preprocess(&mat, &input_tensors[0], &im_info)) { - FDERROR << "Failed to preprocess input image." << std::endl; - return false; - } - - input_tensors[0].name = InputInfoOfRuntime(0).name; - std::vector output_tensors; - if (!Infer(input_tensors, &output_tensors)) { - FDERROR << "Failed to inference." << std::endl; - return false; - } - if (!Postprocess(output_tensors[0], result, im_info, conf_threshold, - nms_iou_threshold)) { - FDERROR << "Failed to post process." << std::endl; - return false; - } - - return true; -} - -} // namespace detection -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/detection/contrib/scaledyolov4.h b/csrcs/fastdeploy/vision/detection/contrib/scaledyolov4.h deleted file mode 100644 index bb7ff0a28..000000000 --- a/csrcs/fastdeploy/vision/detection/contrib/scaledyolov4.h +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include "fastdeploy/fastdeploy_model.h" -#include "fastdeploy/vision/common/processors/transform.h" -#include "fastdeploy/vision/common/result.h" - -namespace fastdeploy { -namespace vision { -namespace detection { - -class FASTDEPLOY_DECL ScaledYOLOv4 : public FastDeployModel { - public: - // 当model_format为ONNX时,无需指定params_file - // 当model_format为Paddle时,则需同时指定model_file & params_file - ScaledYOLOv4(const std::string& model_file, - const std::string& params_file = "", - const RuntimeOption& custom_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX); - - // 定义模型的名称 - virtual std::string ModelName() const { return "ScaledYOLOv4"; } - - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 - // conf_threshold 为后处理的参数 - // nms_iou_threshold 为后处理的参数 - virtual bool Predict(cv::Mat* im, DetectionResult* result, - float conf_threshold = 0.25, - float nms_iou_threshold = 0.5); - - // 以下为模型在预测时的一些参数,基本是前后处理所需 - // 用户在创建模型后,可根据模型的要求,以及自己的需求 - // 对参数进行修改 - // tuple of (width, height) - std::vector size; - // padding value, size should be same with Channels - std::vector padding_value; - // only pad to the minimum rectange which height and width is times of stride - bool is_mini_pad; - // while is_mini_pad = false and is_no_pad = true, will resize the image to - // the set size - bool is_no_pad; - // if is_scale_up is false, the input image only can be zoom out, the maximum - // resize scale cannot exceed 1.0 - bool is_scale_up; - // padding stride, for is_mini_pad - int stride; - // for offseting the boxes by classes when using NMS - float max_wh; - - private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 - bool Initialize(); - - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 - // im_info为预处理过程保存的数据,在后处理中需要用到 - bool Preprocess(Mat* mat, FDTensor* output, - std::map>* im_info); - - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 - // im_info 为预处理记录的信息,后处理用于还原box - // conf_threshold 后处理时过滤box的置信度阈值 - // nms_iou_threshold 后处理时NMS设定的iou阈值 - bool Postprocess(FDTensor& infer_result, DetectionResult* result, - const std::map>& im_info, - float conf_threshold, float nms_iou_threshold); - - // 对图片进行LetterBox处理 - // mat 为读取到的原图 - // size 为输入模型的图像尺寸 - void LetterBox(Mat* mat, const std::vector& size, - const std::vector& color, bool _auto, - bool scale_fill = false, bool scale_up = true, - int stride = 32); - - // whether to inference with dynamic shape (e.g ONNX export with dynamic shape - // or not.) - // while is_dynamic_shape if 'false', is_mini_pad will force 'false'. This - // value will - // auto check by fastdeploy after the internal Runtime already initialized. - bool is_dynamic_input_; -}; -} // namespace detection -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/detection/contrib/scaledyolov4_pybind.cc b/csrcs/fastdeploy/vision/detection/contrib/scaledyolov4_pybind.cc deleted file mode 100644 index 3e8e43b9e..000000000 --- a/csrcs/fastdeploy/vision/detection/contrib/scaledyolov4_pybind.cc +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/pybind/main.h" - -namespace fastdeploy { -void BindScaledYOLOv4(pybind11::module& m) { - pybind11::class_( - m, "ScaledYOLOv4") - .def(pybind11::init()) - .def("predict", - [](vision::detection::ScaledYOLOv4& self, pybind11::array& data, - float conf_threshold, float nms_iou_threshold) { - auto mat = PyArrayToCvMat(data); - vision::DetectionResult res; - self.Predict(&mat, &res, conf_threshold, nms_iou_threshold); - return res; - }) - .def_readwrite("size", &vision::detection::ScaledYOLOv4::size) - .def_readwrite("padding_value", - &vision::detection::ScaledYOLOv4::padding_value) - .def_readwrite("is_mini_pad", - &vision::detection::ScaledYOLOv4::is_mini_pad) - .def_readwrite("is_no_pad", &vision::detection::ScaledYOLOv4::is_no_pad) - .def_readwrite("is_scale_up", - &vision::detection::ScaledYOLOv4::is_scale_up) - .def_readwrite("stride", &vision::detection::ScaledYOLOv4::stride) - .def_readwrite("max_wh", &vision::detection::ScaledYOLOv4::max_wh); -} -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/detection/contrib/yolor.cc b/csrcs/fastdeploy/vision/detection/contrib/yolor.cc deleted file mode 100644 index 5e6fa2fdd..000000000 --- a/csrcs/fastdeploy/vision/detection/contrib/yolor.cc +++ /dev/null @@ -1,253 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/detection/contrib/yolor.h" -#include "fastdeploy/utils/perf.h" -#include "fastdeploy/vision/utils/utils.h" - -namespace fastdeploy { -namespace vision { -namespace detection { - -void YOLOR::LetterBox(Mat* mat, const std::vector& size, - const std::vector& color, bool _auto, - bool scale_fill, bool scale_up, int stride) { - float scale = - std::min(size[1] * 1.0 / mat->Height(), size[0] * 1.0 / mat->Width()); - if (!scale_up) { - scale = std::min(scale, 1.0f); - } - - int resize_h = int(round(mat->Height() * scale)); - int resize_w = int(round(mat->Width() * scale)); - - int pad_w = size[0] - resize_w; - int pad_h = size[1] - resize_h; - if (_auto) { - pad_h = pad_h % stride; - pad_w = pad_w % stride; - } else if (scale_fill) { - pad_h = 0; - pad_w = 0; - resize_h = size[1]; - resize_w = size[0]; - } - if (resize_h != mat->Height() || resize_w != mat->Width()) { - Resize::Run(mat, resize_w, resize_h); - } - if (pad_h > 0 || pad_w > 0) { - float half_h = pad_h * 1.0 / 2; - int top = int(round(half_h - 0.1)); - int bottom = int(round(half_h + 0.1)); - float half_w = pad_w * 1.0 / 2; - int left = int(round(half_w - 0.1)); - int right = int(round(half_w + 0.1)); - Pad::Run(mat, top, bottom, left, right, color); - } -} - -YOLOR::YOLOR(const std::string& model_file, const std::string& params_file, - const RuntimeOption& custom_option, const Frontend& model_format) { - if (model_format == Frontend::ONNX) { - valid_cpu_backends = {Backend::ORT}; // 指定可用的CPU后端 - valid_gpu_backends = {Backend::ORT, Backend::TRT}; // 指定可用的GPU后端 - } else { - valid_cpu_backends = {Backend::PDINFER}; - valid_gpu_backends = {Backend::PDINFER}; - } - runtime_option = custom_option; - runtime_option.model_format = model_format; - runtime_option.model_file = model_file; - runtime_option.params_file = params_file; - initialized = Initialize(); -} - -bool YOLOR::Initialize() { - // parameters for preprocess - size = {640, 640}; - padding_value = {114.0, 114.0, 114.0}; - is_mini_pad = false; - is_no_pad = false; - is_scale_up = false; - stride = 32; - max_wh = 7680.0; - - if (!InitRuntime()) { - FDERROR << "Failed to initialize fastdeploy backend." << std::endl; - return false; - } - // Check if the input shape is dynamic after Runtime already initialized, - // Note that, We need to force is_mini_pad 'false' to keep static - // shape after padding (LetterBox) when the is_dynamic_shape is 'false'. - is_dynamic_input_ = false; - auto shape = InputInfoOfRuntime(0).shape; - for (int i = 0; i < shape.size(); ++i) { - // if height or width is dynamic - if (i >= 2 && shape[i] <= 0) { - is_dynamic_input_ = true; - break; - } - } - if (!is_dynamic_input_) { - is_mini_pad = false; - } - return true; -} - -bool YOLOR::Preprocess(Mat* mat, FDTensor* output, - std::map>* im_info) { - // process after image load - float ratio = std::min(size[1] * 1.0f / static_cast(mat->Height()), - size[0] * 1.0f / static_cast(mat->Width())); - if (ratio != 1.0) { - int interp = cv::INTER_AREA; - if (ratio > 1.0) { - interp = cv::INTER_LINEAR; - } - int resize_h = int(mat->Height() * ratio); - int resize_w = int(mat->Width() * ratio); - Resize::Run(mat, resize_w, resize_h, -1, -1, interp); - } - // yolor's preprocess steps - // 1. letterbox - // 2. BGR->RGB - // 3. HWC->CHW - YOLOR::LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad, - is_scale_up, stride); - BGR2RGB::Run(mat); - // Normalize::Run(mat, std::vector(mat->Channels(), 0.0), - // std::vector(mat->Channels(), 1.0)); - // Compute `result = mat * alpha + beta` directly by channel - std::vector alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; - std::vector beta = {0.0f, 0.0f, 0.0f}; - Convert::Run(mat, alpha, beta); - - // Record output shape of preprocessed image - (*im_info)["output_shape"] = {static_cast(mat->Height()), - static_cast(mat->Width())}; - - HWC2CHW::Run(mat); - Cast::Run(mat, "float"); - mat->ShareWithTensor(output); - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c - return true; -} - -bool YOLOR::Postprocess( - FDTensor& infer_result, DetectionResult* result, - const std::map>& im_info, - float conf_threshold, float nms_iou_threshold) { - FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now."); - result->Clear(); - result->Reserve(infer_result.shape[1]); - if (infer_result.dtype != FDDataType::FP32) { - FDERROR << "Only support post process with float32 data." << std::endl; - return false; - } - float* data = static_cast(infer_result.Data()); - for (size_t i = 0; i < infer_result.shape[1]; ++i) { - int s = i * infer_result.shape[2]; - float confidence = data[s + 4]; - float* max_class_score = - std::max_element(data + s + 5, data + s + infer_result.shape[2]); - confidence *= (*max_class_score); - // filter boxes by conf_threshold - if (confidence <= conf_threshold) { - continue; - } - int32_t label_id = std::distance(data + s + 5, max_class_score); - // convert from [x, y, w, h] to [x1, y1, x2, y2] - result->boxes.emplace_back(std::array{ - data[s] - data[s + 2] / 2.0f + label_id * max_wh, - data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh, - data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh, - data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh}); - result->label_ids.push_back(label_id); - result->scores.push_back(confidence); - } - utils::NMS(result, nms_iou_threshold); - - // scale the boxes to the origin image shape - auto iter_out = im_info.find("output_shape"); - auto iter_ipt = im_info.find("input_shape"); - FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(), - "Cannot find input_shape or output_shape from im_info."); - float out_h = iter_out->second[0]; - float out_w = iter_out->second[1]; - float ipt_h = iter_ipt->second[0]; - float ipt_w = iter_ipt->second[1]; - float scale = std::min(out_h / ipt_h, out_w / ipt_w); - float pad_h = (out_h - ipt_h * scale) / 2.0f; - float pad_w = (out_w - ipt_w * scale) / 2.0f; - if (is_mini_pad) { - // 和 LetterBox中_auto=true的处理逻辑对应 - pad_h = static_cast(static_cast(pad_h) % stride); - pad_w = static_cast(static_cast(pad_w) % stride); - } - for (size_t i = 0; i < result->boxes.size(); ++i) { - int32_t label_id = (result->label_ids)[i]; - // clip box - result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id; - result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id; - result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id; - result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id; - result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f); - result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f); - result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f); - result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f); - result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f); - result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f); - result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f); - result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f); - } - return true; -} - -bool YOLOR::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold, - float nms_iou_threshold) { - Mat mat(*im); - std::vector input_tensors(1); - - std::map> im_info; - - // Record the shape of image and the shape of preprocessed image - im_info["input_shape"] = {static_cast(mat.Height()), - static_cast(mat.Width())}; - im_info["output_shape"] = {static_cast(mat.Height()), - static_cast(mat.Width())}; - - if (!Preprocess(&mat, &input_tensors[0], &im_info)) { - FDERROR << "Failed to preprocess input image." << std::endl; - return false; - } - - input_tensors[0].name = InputInfoOfRuntime(0).name; - std::vector output_tensors; - if (!Infer(input_tensors, &output_tensors)) { - FDERROR << "Failed to inference." << std::endl; - return false; - } - - if (!Postprocess(output_tensors[0], result, im_info, conf_threshold, - nms_iou_threshold)) { - FDERROR << "Failed to post process." << std::endl; - return false; - } - - return true; -} - -} // namespace detection -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/detection/contrib/yolor.h b/csrcs/fastdeploy/vision/detection/contrib/yolor.h deleted file mode 100644 index 2de7a456f..000000000 --- a/csrcs/fastdeploy/vision/detection/contrib/yolor.h +++ /dev/null @@ -1,102 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include "fastdeploy/fastdeploy_model.h" -#include "fastdeploy/vision/common/processors/transform.h" -#include "fastdeploy/vision/common/result.h" - -namespace fastdeploy { -namespace vision { -namespace detection { - -class FASTDEPLOY_DECL YOLOR : public FastDeployModel { - public: - // 当model_format为ONNX时,无需指定params_file - // 当model_format为Paddle时,则需同时指定model_file & params_file - YOLOR(const std::string& model_file, const std::string& params_file = "", - const RuntimeOption& custom_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX); - - // 定义模型的名称 - virtual std::string ModelName() const { return "YOLOR"; } - - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 - // conf_threshold 为后处理的参数 - // nms_iou_threshold 为后处理的参数 - virtual bool Predict(cv::Mat* im, DetectionResult* result, - float conf_threshold = 0.25, - float nms_iou_threshold = 0.5); - - // 以下为模型在预测时的一些参数,基本是前后处理所需 - // 用户在创建模型后,可根据模型的要求,以及自己的需求 - // 对参数进行修改 - // tuple of (width, height) - std::vector size; - // padding value, size should be same with Channels - std::vector padding_value; - // only pad to the minimum rectange which height and width is times of stride - bool is_mini_pad; - // while is_mini_pad = false and is_no_pad = true, will resize the image to - // the set size - bool is_no_pad; - // if is_scale_up is false, the input image only can be zoom out, the maximum - // resize scale cannot exceed 1.0 - bool is_scale_up; - // padding stride, for is_mini_pad - int stride; - // for offseting the boxes by classes when using NMS - float max_wh; - - private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 - bool Initialize(); - - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 - // im_info为预处理过程保存的数据,在后处理中需要用到 - bool Preprocess(Mat* mat, FDTensor* output, - std::map>* im_info); - - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 - // im_info 为预处理记录的信息,后处理用于还原box - // conf_threshold 后处理时过滤box的置信度阈值 - // nms_iou_threshold 后处理时NMS设定的iou阈值 - bool Postprocess(FDTensor& infer_result, DetectionResult* result, - const std::map>& im_info, - float conf_threshold, float nms_iou_threshold); - - // 对图片进行LetterBox处理 - // mat 为读取到的原图 - // size 为输入模型的图像尺寸 - void LetterBox(Mat* mat, const std::vector& size, - const std::vector& color, bool _auto, - bool scale_fill = false, bool scale_up = true, - int stride = 32); - - // whether to inference with dynamic shape (e.g ONNX export with dynamic shape - // or not.) - // while is_dynamic_shape if 'false', is_mini_pad will force 'false'. This - // value will - // auto check by fastdeploy after the internal Runtime already initialized. - bool is_dynamic_input_; -}; -} // namespace detection -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/detection/contrib/yolor_pybind.cc b/csrcs/fastdeploy/vision/detection/contrib/yolor_pybind.cc deleted file mode 100644 index 0e0a21ca5..000000000 --- a/csrcs/fastdeploy/vision/detection/contrib/yolor_pybind.cc +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/pybind/main.h" - -namespace fastdeploy { -void BindYOLOR(pybind11::module& m) { - pybind11::class_(m, "YOLOR") - .def(pybind11::init()) - .def("predict", - [](vision::detection::YOLOR& self, pybind11::array& data, - float conf_threshold, float nms_iou_threshold) { - auto mat = PyArrayToCvMat(data); - vision::DetectionResult res; - self.Predict(&mat, &res, conf_threshold, nms_iou_threshold); - return res; - }) - .def_readwrite("size", &vision::detection::YOLOR::size) - .def_readwrite("padding_value", &vision::detection::YOLOR::padding_value) - .def_readwrite("is_mini_pad", &vision::detection::YOLOR::is_mini_pad) - .def_readwrite("is_no_pad", &vision::detection::YOLOR::is_no_pad) - .def_readwrite("is_scale_up", &vision::detection::YOLOR::is_scale_up) - .def_readwrite("stride", &vision::detection::YOLOR::stride) - .def_readwrite("max_wh", &vision::detection::YOLOR::max_wh); -} -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/detection/contrib/yolov5.cc b/csrcs/fastdeploy/vision/detection/contrib/yolov5.cc deleted file mode 100644 index 306051e80..000000000 --- a/csrcs/fastdeploy/vision/detection/contrib/yolov5.cc +++ /dev/null @@ -1,295 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/detection/contrib/yolov5.h" -#include "fastdeploy/utils/perf.h" -#include "fastdeploy/vision/utils/utils.h" - -namespace fastdeploy { -namespace vision { -namespace detection { - -void YOLOv5::LetterBox(Mat* mat, std::vector size, - std::vector color, bool _auto, bool scale_fill, - bool scale_up, int stride) { - float scale = - std::min(size[1] * 1.0 / mat->Height(), size[0] * 1.0 / mat->Width()); - if (!scale_up) { - scale = std::min(scale, 1.0f); - } - - int resize_h = int(round(mat->Height() * scale)); - int resize_w = int(round(mat->Width() * scale)); - - int pad_w = size[0] - resize_w; - int pad_h = size[1] - resize_h; - if (_auto) { - pad_h = pad_h % stride; - pad_w = pad_w % stride; - } else if (scale_fill) { - pad_h = 0; - pad_w = 0; - resize_h = size[1]; - resize_w = size[0]; - } - Resize::Run(mat, resize_w, resize_h); - if (pad_h > 0 || pad_w > 0) { - float half_h = pad_h * 1.0 / 2; - int top = int(round(half_h - 0.1)); - int bottom = int(round(half_h + 0.1)); - float half_w = pad_w * 1.0 / 2; - int left = int(round(half_w - 0.1)); - int right = int(round(half_w + 0.1)); - Pad::Run(mat, top, bottom, left, right, color); - } -} - -YOLOv5::YOLOv5(const std::string& model_file, const std::string& params_file, - const RuntimeOption& custom_option, - const Frontend& model_format) { - if (model_format == Frontend::ONNX) { - valid_cpu_backends = {Backend::ORT}; // 指定可用的CPU后端 - valid_gpu_backends = {Backend::ORT, Backend::TRT}; // 指定可用的GPU后端 - } else { - valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; - valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; - } - runtime_option = custom_option; - runtime_option.model_format = model_format; - runtime_option.model_file = model_file; - runtime_option.params_file = params_file; - initialized = Initialize(); -} - -bool YOLOv5::Initialize() { - // parameters for preprocess - size = {640, 640}; - padding_value = {114.0, 114.0, 114.0}; - is_mini_pad = false; - is_no_pad = false; - is_scale_up = false; - stride = 32; - max_wh = 7680.0; - multi_label = true; - - if (!InitRuntime()) { - FDERROR << "Failed to initialize fastdeploy backend." << std::endl; - return false; - } - // Check if the input shape is dynamic after Runtime already initialized, - // Note that, We need to force is_mini_pad 'false' to keep static - // shape after padding (LetterBox) when the is_dynamic_shape is 'false'. - is_dynamic_input_ = false; - auto shape = InputInfoOfRuntime(0).shape; - for (int i = 0; i < shape.size(); ++i) { - // if height or width is dynamic - if (i >= 2 && shape[i] <= 0) { - is_dynamic_input_ = true; - break; - } - } - if (!is_dynamic_input_) { - is_mini_pad = false; - } - return true; -} - -bool YOLOv5::Preprocess(Mat* mat, FDTensor* output, - std::map>* im_info) { - // process after image load - double ratio = (size[0] * 1.0) / std::max(static_cast(mat->Height()), - static_cast(mat->Width())); - if (ratio != 1.0) { - int interp = cv::INTER_AREA; - if (ratio > 1.0) { - interp = cv::INTER_LINEAR; - } - int resize_h = int(mat->Height() * ratio); - int resize_w = int(mat->Width() * ratio); - Resize::Run(mat, resize_w, resize_h, -1, -1, interp); - } - // yolov5's preprocess steps - // 1. letterbox - // 2. BGR->RGB - // 3. HWC->CHW - LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad, is_scale_up, - stride); - BGR2RGB::Run(mat); - // Normalize::Run(mat, std::vector(mat->Channels(), 0.0), - // std::vector(mat->Channels(), 1.0)); - // Compute `result = mat * alpha + beta` directly by channel - std::vector alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; - std::vector beta = {0.0f, 0.0f, 0.0f}; - Convert::Run(mat, alpha, beta); - - // Record output shape of preprocessed image - (*im_info)["output_shape"] = {static_cast(mat->Height()), - static_cast(mat->Width())}; - - HWC2CHW::Run(mat); - Cast::Run(mat, "float"); - mat->ShareWithTensor(output); - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c - return true; -} - -bool YOLOv5::Postprocess( - FDTensor& infer_result, DetectionResult* result, - const std::map>& im_info, - float conf_threshold, float nms_iou_threshold, bool multi_label) { - FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now."); - result->Clear(); - if (multi_label) { - result->Reserve(infer_result.shape[1] * (infer_result.shape[2] - 5)); - } else { - result->Reserve(infer_result.shape[1]); - } - if (infer_result.dtype != FDDataType::FP32) { - FDERROR << "Only support post process with float32 data." << std::endl; - return false; - } - float* data = static_cast(infer_result.Data()); - for (size_t i = 0; i < infer_result.shape[1]; ++i) { - int s = i * infer_result.shape[2]; - float confidence = data[s + 4]; - if (multi_label) { - for (size_t j = 5; j < infer_result.shape[2]; ++j) { - confidence = data[s + 4]; - float* class_score = data + s + j; - confidence *= (*class_score); - // filter boxes by conf_threshold - if (confidence <= conf_threshold) { - continue; - } - int32_t label_id = std::distance(data + s + 5, class_score); - - // convert from [x, y, w, h] to [x1, y1, x2, y2] - result->boxes.emplace_back(std::array{ - data[s] - data[s + 2] / 2.0f + label_id * max_wh, - data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh, - data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh, - data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh}); - result->label_ids.push_back(label_id); - result->scores.push_back(confidence); - } - } else { - float* max_class_score = - std::max_element(data + s + 5, data + s + infer_result.shape[2]); - confidence *= (*max_class_score); - // filter boxes by conf_threshold - if (confidence <= conf_threshold) { - continue; - } - int32_t label_id = std::distance(data + s + 5, max_class_score); - // convert from [x, y, w, h] to [x1, y1, x2, y2] - result->boxes.emplace_back(std::array{ - data[s] - data[s + 2] / 2.0f + label_id * max_wh, - data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh, - data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh, - data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh}); - result->label_ids.push_back(label_id); - result->scores.push_back(confidence); - } - } - - if (result->boxes.size() == 0) { - return true; - } - - utils::NMS(result, nms_iou_threshold); - - // scale the boxes to the origin image shape - auto iter_out = im_info.find("output_shape"); - auto iter_ipt = im_info.find("input_shape"); - FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(), - "Cannot find input_shape or output_shape from im_info."); - float out_h = iter_out->second[0]; - float out_w = iter_out->second[1]; - float ipt_h = iter_ipt->second[0]; - float ipt_w = iter_ipt->second[1]; - float scale = std::min(out_h / ipt_h, out_w / ipt_w); - for (size_t i = 0; i < result->boxes.size(); ++i) { - float pad_h = (out_h - ipt_h * scale) / 2; - float pad_w = (out_w - ipt_w * scale) / 2; - int32_t label_id = (result->label_ids)[i]; - // clip box - result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id; - result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id; - result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id; - result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id; - result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f); - result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f); - result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f); - result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f); - result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w); - result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h); - result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w); - result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h); - } - return true; -} - -bool YOLOv5::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold, - float nms_iou_threshold) { -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_START(0) -#endif - - Mat mat(*im); - std::vector input_tensors(1); - - std::map> im_info; - - // Record the shape of image and the shape of preprocessed image - im_info["input_shape"] = {static_cast(mat.Height()), - static_cast(mat.Width())}; - im_info["output_shape"] = {static_cast(mat.Height()), - static_cast(mat.Width())}; - - if (!Preprocess(&mat, &input_tensors[0], &im_info)) { - FDERROR << "Failed to preprocess input image." << std::endl; - return false; - } - -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_END(0, "Preprocess") - TIMERECORD_START(1) -#endif - - input_tensors[0].name = InputInfoOfRuntime(0).name; - std::vector output_tensors; - if (!Infer(input_tensors, &output_tensors)) { - FDERROR << "Failed to inference." << std::endl; - return false; - } -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_END(1, "Inference") - TIMERECORD_START(2) -#endif - - if (!Postprocess(output_tensors[0], result, im_info, conf_threshold, - nms_iou_threshold, multi_label)) { - FDERROR << "Failed to post process." << std::endl; - return false; - } - -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_END(2, "Postprocess") -#endif - return true; -} - -} // namespace detection -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/detection/contrib/yolov5.h b/csrcs/fastdeploy/vision/detection/contrib/yolov5.h deleted file mode 100644 index 68c910d23..000000000 --- a/csrcs/fastdeploy/vision/detection/contrib/yolov5.h +++ /dev/null @@ -1,108 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include "fastdeploy/fastdeploy_model.h" -#include "fastdeploy/vision/common/processors/transform.h" -#include "fastdeploy/vision/common/result.h" - -namespace fastdeploy { -namespace vision { -namespace detection { - -class FASTDEPLOY_DECL YOLOv5 : public FastDeployModel { - public: - // 当model_format为ONNX时,无需指定params_file - // 当model_format为Paddle时,则需同时指定model_file & params_file - YOLOv5(const std::string& model_file, const std::string& params_file = "", - const RuntimeOption& custom_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX); - - // 定义模型的名称 - std::string ModelName() const { return "yolov5"; } - - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 - // conf_threshold 为后处理的参数 - // nms_iou_threshold 为后处理的参数 - virtual bool Predict(cv::Mat* im, DetectionResult* result, - float conf_threshold = 0.25, - float nms_iou_threshold = 0.5); - - // 以下为模型在预测时的一些参数,基本是前后处理所需 - // 用户在创建模型后,可根据模型的要求,以及自己的需求 - // 对参数进行修改 - // tuple of (width, height) - std::vector size; - // padding value, size should be same with Channels - std::vector padding_value; - // only pad to the minimum rectange which height and width is times of stride - bool is_mini_pad; - // while is_mini_pad = false and is_no_pad = true, will resize the image to - // the set size - bool is_no_pad; - // if is_scale_up is false, the input image only can be zoom out, the maximum - // resize scale cannot exceed 1.0 - bool is_scale_up; - // padding stride, for is_mini_pad - int stride; - // for offseting the boxes by classes when using NMS - float max_wh; - // for different strategies to get boxes when postprocessing - bool multi_label; - - private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 - bool Initialize(); - - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 - // im_info为预处理过程保存的数据,在后处理中需要用到 - bool Preprocess(Mat* mat, FDTensor* outputs, - std::map>* im_info); - - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 - // im_info 为预处理记录的信息,后处理用于还原box - // conf_threshold 后处理时过滤box的置信度阈值 - // nms_iou_threshold 后处理时NMS设定的iou阈值 - // multi_label 后处理时box选取是否采用多标签方式 - bool Postprocess(FDTensor& infer_result, DetectionResult* result, - const std::map>& im_info, - float conf_threshold, float nms_iou_threshold, - bool multi_label); - - // 查看输入是否为动态维度的 不建议直接使用 不同模型的逻辑可能不一致 - bool IsDynamicInput() const { return is_dynamic_input_; } - - void LetterBox(Mat* mat, std::vector size, std::vector color, - bool _auto, bool scale_fill = false, bool scale_up = true, - int stride = 32); - - // whether to inference with dynamic shape (e.g ONNX export with dynamic shape - // or not.) - // YOLOv5 official 'export_onnx.py' script will export dynamic ONNX by - // default. - // while is_dynamic_shape if 'false', is_mini_pad will force 'false'. This - // value will - // auto check by fastdeploy after the internal Runtime already initialized. - bool is_dynamic_input_; -}; - -} // namespace detection -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/detection/contrib/yolov5_pybind.cc b/csrcs/fastdeploy/vision/detection/contrib/yolov5_pybind.cc deleted file mode 100644 index 65ba538b8..000000000 --- a/csrcs/fastdeploy/vision/detection/contrib/yolov5_pybind.cc +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/pybind/main.h" - -namespace fastdeploy { -void BindYOLOv5(pybind11::module& m) { - pybind11::class_(m, "YOLOv5") - .def(pybind11::init()) - .def("predict", - [](vision::detection::YOLOv5& self, pybind11::array& data, - float conf_threshold, float nms_iou_threshold) { - auto mat = PyArrayToCvMat(data); - vision::DetectionResult res; - self.Predict(&mat, &res, conf_threshold, nms_iou_threshold); - return res; - }) - .def_readwrite("size", &vision::detection::YOLOv5::size) - .def_readwrite("padding_value", &vision::detection::YOLOv5::padding_value) - .def_readwrite("is_mini_pad", &vision::detection::YOLOv5::is_mini_pad) - .def_readwrite("is_no_pad", &vision::detection::YOLOv5::is_no_pad) - .def_readwrite("is_scale_up", &vision::detection::YOLOv5::is_scale_up) - .def_readwrite("stride", &vision::detection::YOLOv5::stride) - .def_readwrite("max_wh", &vision::detection::YOLOv5::max_wh) - .def_readwrite("multi_label", &vision::detection::YOLOv5::multi_label); -} -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/detection/contrib/yolov5lite.cc b/csrcs/fastdeploy/vision/detection/contrib/yolov5lite.cc deleted file mode 100644 index 26ca15f1e..000000000 --- a/csrcs/fastdeploy/vision/detection/contrib/yolov5lite.cc +++ /dev/null @@ -1,399 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/detection/contrib/yolov5lite.h" -#include "fastdeploy/utils/perf.h" -#include "fastdeploy/vision/utils/utils.h" - -namespace fastdeploy { -namespace vision { -namespace detection { - -void YOLOv5Lite::LetterBox(Mat* mat, const std::vector& size, - const std::vector& color, bool _auto, - bool scale_fill, bool scale_up, int stride) { - float scale = - std::min(size[1] * 1.0 / mat->Height(), size[0] * 1.0 / mat->Width()); - if (!scale_up) { - scale = std::min(scale, 1.0f); - } - - int resize_h = int(round(mat->Height() * scale)); - int resize_w = int(round(mat->Width() * scale)); - - int pad_w = size[0] - resize_w; - int pad_h = size[1] - resize_h; - if (_auto) { - pad_h = pad_h % stride; - pad_w = pad_w % stride; - } else if (scale_fill) { - pad_h = 0; - pad_w = 0; - resize_h = size[1]; - resize_w = size[0]; - } - if (resize_h != mat->Height() || resize_w != mat->Width()) { - Resize::Run(mat, resize_w, resize_h); - } - if (pad_h > 0 || pad_w > 0) { - float half_h = pad_h * 1.0 / 2; - int top = int(round(half_h - 0.1)); - int bottom = int(round(half_h + 0.1)); - float half_w = pad_w * 1.0 / 2; - int left = int(round(half_w - 0.1)); - int right = int(round(half_w + 0.1)); - Pad::Run(mat, top, bottom, left, right, color); - } -} - -void YOLOv5Lite::GenerateAnchors(const std::vector& size, - const std::vector& downsample_strides, - std::vector* anchors, - int num_anchors) { - // size: tuple of input (width, height) - // downsample_strides: downsample strides in YOLOv5Lite, e.g (8,16,32) - const int width = size[0]; - const int height = size[1]; - for (int i = 0; i < downsample_strides.size(); ++i) { - const int ds = downsample_strides[i]; - int num_grid_w = width / ds; - int num_grid_h = height / ds; - for (int an = 0; an < num_anchors; ++an) { - float anchor_w = anchor_config[i][an * 2]; - float anchor_h = anchor_config[i][an * 2 + 1]; - for (int g1 = 0; g1 < num_grid_h; ++g1) { - for (int g0 = 0; g0 < num_grid_w; ++g0) { - (*anchors).emplace_back(Anchor{g0, g1, ds, anchor_w, anchor_h}); - } - } - } - } -} - -YOLOv5Lite::YOLOv5Lite(const std::string& model_file, - const std::string& params_file, - const RuntimeOption& custom_option, - const Frontend& model_format) { - if (model_format == Frontend::ONNX) { - valid_cpu_backends = {Backend::ORT}; // 指定可用的CPU后端 - valid_gpu_backends = {Backend::ORT, Backend::TRT}; // 指定可用的GPU后端 - } else { - valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; - valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; - } - runtime_option = custom_option; - runtime_option.model_format = model_format; - runtime_option.model_file = model_file; - runtime_option.params_file = params_file; - initialized = Initialize(); -} - -bool YOLOv5Lite::Initialize() { - // parameters for preprocess - size = {640, 640}; - padding_value = {114.0, 114.0, 114.0}; - downsample_strides = {8, 16, 32}; - is_mini_pad = false; - is_no_pad = false; - is_scale_up = false; - stride = 32; - max_wh = 7680.0; - is_decode_exported = false; - anchor_config = {{10.0, 13.0, 16.0, 30.0, 33.0, 23.0}, - {30.0, 61.0, 62.0, 45.0, 59.0, 119.0}, - {116.0, 90.0, 156.0, 198.0, 373.0, 326.0}}; - - if (!InitRuntime()) { - FDERROR << "Failed to initialize fastdeploy backend." << std::endl; - return false; - } - // Check if the input shape is dynamic after Runtime already initialized, - // Note that, We need to force is_mini_pad 'false' to keep static - // shape after padding (LetterBox) when the is_dynamic_shape is 'false'. - is_dynamic_input_ = false; - auto shape = InputInfoOfRuntime(0).shape; - for (int i = 0; i < shape.size(); ++i) { - // if height or width is dynamic - if (i >= 2 && shape[i] <= 0) { - is_dynamic_input_ = true; - break; - } - } - if (!is_dynamic_input_) { - is_mini_pad = false; - } - return true; -} - -bool YOLOv5Lite::Preprocess( - Mat* mat, FDTensor* output, - std::map>* im_info) { - // process after image load - float ratio = std::min(size[1] * 1.0f / static_cast(mat->Height()), - size[0] * 1.0f / static_cast(mat->Width())); - if (ratio != 1.0) { - int interp = cv::INTER_AREA; - if (ratio > 1.0) { - interp = cv::INTER_LINEAR; - } - int resize_h = int(mat->Height() * ratio); - int resize_w = int(mat->Width() * ratio); - Resize::Run(mat, resize_w, resize_h, -1, -1, interp); - } - // yolov5lite's preprocess steps - // 1. letterbox - // 2. BGR->RGB - // 3. HWC->CHW - YOLOv5Lite::LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad, - is_scale_up, stride); - BGR2RGB::Run(mat); - // Normalize::Run(mat, std::vector(mat->Channels(), 0.0), - // std::vector(mat->Channels(), 1.0)); - // Compute `result = mat * alpha + beta` directly by channel - std::vector alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; - std::vector beta = {0.0f, 0.0f, 0.0f}; - Convert::Run(mat, alpha, beta); - - // Record output shape of preprocessed image - (*im_info)["output_shape"] = {static_cast(mat->Height()), - static_cast(mat->Width())}; - - HWC2CHW::Run(mat); - Cast::Run(mat, "float"); - mat->ShareWithTensor(output); - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c - return true; -} - -bool YOLOv5Lite::PostprocessWithDecode( - FDTensor& infer_result, DetectionResult* result, - const std::map>& im_info, - float conf_threshold, float nms_iou_threshold) { - FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now."); - result->Clear(); - result->Reserve(infer_result.shape[1]); - if (infer_result.dtype != FDDataType::FP32) { - FDERROR << "Only support post process with float32 data." << std::endl; - return false; - } - // generate anchors with dowmsample strides - std::vector anchors; - int num_anchors = anchor_config[0].size() / 2; - GenerateAnchors(size, downsample_strides, &anchors, num_anchors); - // infer_result shape might look like (1,n,85=5+80) - float* data = static_cast(infer_result.Data()); - for (size_t i = 0; i < infer_result.shape[1]; ++i) { - int s = i * infer_result.shape[2]; - float confidence = data[s + 4]; - float* max_class_score = - std::max_element(data + s + 5, data + s + infer_result.shape[2]); - confidence *= (*max_class_score); - // filter boxes by conf_threshold - if (confidence <= conf_threshold) { - continue; - } - int32_t label_id = std::distance(data + s + 5, max_class_score); - // fetch i-th anchor - float grid0 = static_cast(anchors.at(i).grid0); - float grid1 = static_cast(anchors.at(i).grid1); - float downsample_stride = static_cast(anchors.at(i).stride); - float anchor_w = static_cast(anchors.at(i).anchor_w); - float anchor_h = static_cast(anchors.at(i).anchor_h); - // convert from offsets to [x, y, w, h] - float dx = data[s]; - float dy = data[s + 1]; - float dw = data[s + 2]; - float dh = data[s + 3]; - - float x = (dx * 2.0f - 0.5f + grid0) * downsample_stride; - float y = (dy * 2.0f - 0.5f + grid1) * downsample_stride; - float w = std::pow(dw * 2.0f, 2.0f) * anchor_w; - float h = std::pow(dh * 2.0f, 2.0f) * anchor_h; - - // convert from [x, y, w, h] to [x1, y1, x2, y2] - result->boxes.emplace_back(std::array{ - x - w / 2.0f + label_id * max_wh, y - h / 2.0f + label_id * max_wh, - x + w / 2.0f + label_id * max_wh, y + h / 2.0f + label_id * max_wh}); - // label_id * max_wh for multi classes NMS - result->label_ids.push_back(label_id); - result->scores.push_back(confidence); - } - utils::NMS(result, nms_iou_threshold); - - // scale the boxes to the origin image shape - auto iter_out = im_info.find("output_shape"); - auto iter_ipt = im_info.find("input_shape"); - FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(), - "Cannot find input_shape or output_shape from im_info."); - float out_h = iter_out->second[0]; - float out_w = iter_out->second[1]; - float ipt_h = iter_ipt->second[0]; - float ipt_w = iter_ipt->second[1]; - float scale = std::min(out_h / ipt_h, out_w / ipt_w); - float pad_h = (out_h - ipt_h * scale) / 2.0f; - float pad_w = (out_w - ipt_w * scale) / 2.0f; - if (is_mini_pad) { - // 和 LetterBox中_auto=true的处理逻辑对应 - pad_h = static_cast(static_cast(pad_h) % stride); - pad_w = static_cast(static_cast(pad_w) % stride); - } - for (size_t i = 0; i < result->boxes.size(); ++i) { - int32_t label_id = (result->label_ids)[i]; - // clip box - result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id; - result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id; - result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id; - result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id; - result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f); - result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f); - result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f); - result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f); - result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f); - result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f); - result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f); - result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f); - } - return true; -} - -bool YOLOv5Lite::Postprocess( - FDTensor& infer_result, DetectionResult* result, - const std::map>& im_info, - float conf_threshold, float nms_iou_threshold) { - FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now."); - result->Clear(); - result->Reserve(infer_result.shape[1]); - if (infer_result.dtype != FDDataType::FP32) { - FDERROR << "Only support post process with float32 data." << std::endl; - return false; - } - float* data = static_cast(infer_result.Data()); - for (size_t i = 0; i < infer_result.shape[1]; ++i) { - int s = i * infer_result.shape[2]; - float confidence = data[s + 4]; - float* max_class_score = - std::max_element(data + s + 5, data + s + infer_result.shape[2]); - confidence *= (*max_class_score); - // filter boxes by conf_threshold - if (confidence <= conf_threshold) { - continue; - } - int32_t label_id = std::distance(data + s + 5, max_class_score); - // convert from [x, y, w, h] to [x1, y1, x2, y2] - result->boxes.emplace_back(std::array{ - data[s] - data[s + 2] / 2.0f + label_id * max_wh, - data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh, - data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh, - data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh}); - result->label_ids.push_back(label_id); - result->scores.push_back(confidence); - } - utils::NMS(result, nms_iou_threshold); - - // scale the boxes to the origin image shape - auto iter_out = im_info.find("output_shape"); - auto iter_ipt = im_info.find("input_shape"); - FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(), - "Cannot find input_shape or output_shape from im_info."); - float out_h = iter_out->second[0]; - float out_w = iter_out->second[1]; - float ipt_h = iter_ipt->second[0]; - float ipt_w = iter_ipt->second[1]; - float scale = std::min(out_h / ipt_h, out_w / ipt_w); - float pad_h = (out_h - ipt_h * scale) / 2.0f; - float pad_w = (out_w - ipt_w * scale) / 2.0f; - if (is_mini_pad) { - // 和 LetterBox中_auto=true的处理逻辑对应 - pad_h = static_cast(static_cast(pad_h) % stride); - pad_w = static_cast(static_cast(pad_w) % stride); - } - for (size_t i = 0; i < result->boxes.size(); ++i) { - int32_t label_id = (result->label_ids)[i]; - // clip box - result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id; - result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id; - result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id; - result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id; - result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f); - result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f); - result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f); - result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f); - result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f); - result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f); - result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f); - result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f); - } - return true; -} - -bool YOLOv5Lite::Predict(cv::Mat* im, DetectionResult* result, - float conf_threshold, float nms_iou_threshold) { -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_START(0) -#endif - Mat mat(*im); - std::vector input_tensors(1); - - std::map> im_info; - - // Record the shape of image and the shape of preprocessed image - im_info["input_shape"] = {static_cast(mat.Height()), - static_cast(mat.Width())}; - im_info["output_shape"] = {static_cast(mat.Height()), - static_cast(mat.Width())}; - - if (!Preprocess(&mat, &input_tensors[0], &im_info)) { - FDERROR << "Failed to preprocess input image." << std::endl; - return false; - } - -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_END(0, "Preprocess") - TIMERECORD_START(1) -#endif - - input_tensors[0].name = InputInfoOfRuntime(0).name; - std::vector output_tensors; - if (!Infer(input_tensors, &output_tensors)) { - FDERROR << "Failed to inference." << std::endl; - return false; - } -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_END(1, "Inference") - TIMERECORD_START(2) -#endif - - if (is_decode_exported) { - if (!Postprocess(output_tensors[0], result, im_info, conf_threshold, - nms_iou_threshold)) { - FDERROR << "Failed to post process." << std::endl; - return false; - } - } else { - if (!PostprocessWithDecode(output_tensors[0], result, im_info, - conf_threshold, nms_iou_threshold)) { - FDERROR << "Failed to post process." << std::endl; - return false; - } - } - -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_END(2, "Postprocess") -#endif - return true; -} - -} // namespace detection -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/detection/contrib/yolov5lite.h b/csrcs/fastdeploy/vision/detection/contrib/yolov5lite.h deleted file mode 100644 index 2add202f4..000000000 --- a/csrcs/fastdeploy/vision/detection/contrib/yolov5lite.h +++ /dev/null @@ -1,138 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include "fastdeploy/fastdeploy_model.h" -#include "fastdeploy/vision/common/processors/transform.h" -#include "fastdeploy/vision/common/result.h" - -namespace fastdeploy { -namespace vision { -namespace detection { - -class FASTDEPLOY_DECL YOLOv5Lite : public FastDeployModel { - public: - // 当model_format为ONNX时,无需指定params_file - // 当model_format为Paddle时,则需同时指定model_file & params_file - YOLOv5Lite(const std::string& model_file, const std::string& params_file = "", - const RuntimeOption& custom_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX); - - // 定义模型的名称 - virtual std::string ModelName() const { return "YOLOv5-Lite"; } - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 - // conf_threshold 为后处理的参数 - // nms_iou_threshold 为后处理的参数 - virtual bool Predict(cv::Mat* im, DetectionResult* result, - float conf_threshold = 0.45, - float nms_iou_threshold = 0.25); - - // 以下为模型在预测时的一些参数,基本是前后处理所需 - // 用户在创建模型后,可根据模型的要求,以及自己的需求 - // 对参数进行修改 - // tuple of (width, height) - std::vector size; - // padding value, size should be same with Channels - std::vector padding_value; - // only pad to the minimum rectange which height and width is times of stride - bool is_mini_pad; - // while is_mini_pad = false and is_no_pad = true, will resize the image to - // the set size - bool is_no_pad; - // if is_scale_up is false, the input image only can be zoom out, the maximum - // resize scale cannot exceed 1.0 - bool is_scale_up; - // padding stride, for is_mini_pad - int stride; - // for offseting the boxes by classes when using NMS - float max_wh; - // downsample strides for YOLOv5Lite to generate anchors, will take - // (8,16,32) as default values, might have stride=64. - std::vector downsample_strides; - // anchors parameters, downsample_strides will take - // (8,16,32), each stride has three anchors with width and hight. - std::vector> anchor_config; - // whether the model_file was exported with decode module. The official - // YOLOv5Lite/export.py script will export ONNX file without - // decode module. Please set it 'true' manually if the model file - // was exported with decode module. - // false : ONNX files without decode module. - // true : ONNX file with decode module. - bool is_decode_exported; - - private: - // necessary parameters for GenerateAnchors to generate anchors when ONNX file - // without decode module. - struct Anchor { - int grid0; - int grid1; - int stride; - float anchor_w; - float anchor_h; - }; - - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 - bool Initialize(); - - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 - // im_info为预处理过程保存的数据,在后处理中需要用到 - bool Preprocess(Mat* mat, FDTensor* output, - std::map>* im_info); - - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 - // im_info 为预处理记录的信息,后处理用于还原box - // conf_threshold 后处理时过滤box的置信度阈值 - // nms_iou_threshold 后处理时NMS设定的iou阈值 - bool Postprocess(FDTensor& infer_result, DetectionResult* result, - const std::map>& im_info, - float conf_threshold, float nms_iou_threshold); - - // YOLOv5Lite的官方脚本默认导出不带decode模块的模型文件 需要在后处理进行decode - // the official YOLOv5Lite/export.py will export ONNX file without decode - // module. - // this fuction support the postporocess for ONNX file without decode module. - // set the `is_decode_exported = false`, this function will work. - bool PostprocessWithDecode( - FDTensor& infer_result, DetectionResult* result, - const std::map>& im_info, - float conf_threshold, float nms_iou_threshold); - - // 对图片进行LetterBox处理 - // mat 为读取到的原图 - // size 为输入模型的图像尺寸 - void LetterBox(Mat* mat, const std::vector& size, - const std::vector& color, bool _auto, - bool scale_fill = false, bool scale_up = true, - int stride = 32); - // generate anchors for decodeing when ONNX file without decode module. - void GenerateAnchors(const std::vector& size, - const std::vector& downsample_strides, - std::vector* anchors, const int num_anchors = 3); - - // whether to inference with dynamic shape (e.g ONNX export with dynamic shape - // or not.) - // while is_dynamic_shape if 'false', is_mini_pad will force 'false'. This - // value will - // auto check by fastdeploy after the internal Runtime already initialized. - bool is_dynamic_input_; -}; -} // namespace detection -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/detection/contrib/yolov5lite_pybind.cc b/csrcs/fastdeploy/vision/detection/contrib/yolov5lite_pybind.cc deleted file mode 100644 index dd064e3be..000000000 --- a/csrcs/fastdeploy/vision/detection/contrib/yolov5lite_pybind.cc +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/pybind/main.h" - -namespace fastdeploy { -void BindYOLOv5Lite(pybind11::module& m) { - pybind11::class_(m, - "YOLOv5Lite") - .def(pybind11::init()) - .def("predict", - [](vision::detection::YOLOv5Lite& self, pybind11::array& data, - float conf_threshold, float nms_iou_threshold) { - auto mat = PyArrayToCvMat(data); - vision::DetectionResult res; - self.Predict(&mat, &res, conf_threshold, nms_iou_threshold); - return res; - }) - .def_readwrite("size", &vision::detection::YOLOv5Lite::size) - .def_readwrite("padding_value", - &vision::detection::YOLOv5Lite::padding_value) - .def_readwrite("is_mini_pad", &vision::detection::YOLOv5Lite::is_mini_pad) - .def_readwrite("is_no_pad", &vision::detection::YOLOv5Lite::is_no_pad) - .def_readwrite("is_scale_up", &vision::detection::YOLOv5Lite::is_scale_up) - .def_readwrite("stride", &vision::detection::YOLOv5Lite::stride) - .def_readwrite("max_wh", &vision::detection::YOLOv5Lite::max_wh) - .def_readwrite("anchor_config", - &vision::detection::YOLOv5Lite::anchor_config) - .def_readwrite("is_decode_exported", - &vision::detection::YOLOv5Lite::is_decode_exported); -} -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/detection/contrib/yolov6.cc b/csrcs/fastdeploy/vision/detection/contrib/yolov6.cc deleted file mode 100644 index 7c6827433..000000000 --- a/csrcs/fastdeploy/vision/detection/contrib/yolov6.cc +++ /dev/null @@ -1,267 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/detection/contrib/yolov6.h" -#include "fastdeploy/utils/perf.h" -#include "fastdeploy/vision/utils/utils.h" - -namespace fastdeploy { - -namespace vision { - -namespace detection { - -void YOLOv6::LetterBox(Mat* mat, std::vector size, - std::vector color, bool _auto, bool scale_fill, - bool scale_up, int stride) { - float scale = std::min(size[1] * 1.0f / static_cast(mat->Height()), - size[0] * 1.0f / static_cast(mat->Width())); - if (!scale_up) { - scale = std::min(scale, 1.0f); - } - - int resize_h = int(round(static_cast(mat->Height()) * scale)); - int resize_w = int(round(static_cast(mat->Width()) * scale)); - - int pad_w = size[0] - resize_w; - int pad_h = size[1] - resize_h; - if (_auto) { - pad_h = pad_h % stride; - pad_w = pad_w % stride; - } else if (scale_fill) { - pad_h = 0; - pad_w = 0; - resize_h = size[1]; - resize_w = size[0]; - } - if (resize_h != mat->Height() || resize_w != mat->Width()) { - Resize::Run(mat, resize_w, resize_h); - } - if (pad_h > 0 || pad_w > 0) { - float half_h = pad_h * 1.0 / 2; - int top = int(round(half_h - 0.1)); - int bottom = int(round(half_h + 0.1)); - float half_w = pad_w * 1.0 / 2; - int left = int(round(half_w - 0.1)); - int right = int(round(half_w + 0.1)); - Pad::Run(mat, top, bottom, left, right, color); - } -} - -YOLOv6::YOLOv6(const std::string& model_file, const std::string& params_file, - const RuntimeOption& custom_option, - const Frontend& model_format) { - if (model_format == Frontend::ONNX) { - valid_cpu_backends = {Backend::ORT}; // 指定可用的CPU后端 - valid_gpu_backends = {Backend::ORT, Backend::TRT}; // 指定可用的GPU后端 - } else { - valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; - valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; - } - runtime_option = custom_option; - runtime_option.model_format = model_format; - runtime_option.model_file = model_file; - runtime_option.params_file = params_file; - initialized = Initialize(); -} - -bool YOLOv6::Initialize() { - // parameters for preprocess - size = {640, 640}; - padding_value = {114.0, 114.0, 114.0}; - is_mini_pad = false; - is_no_pad = false; - is_scale_up = false; - stride = 32; - max_wh = 4096.0f; - - if (!InitRuntime()) { - FDERROR << "Failed to initialize fastdeploy backend." << std::endl; - return false; - } - // Check if the input shape is dynamic after Runtime already initialized, - // Note that, We need to force is_mini_pad 'false' to keep static - // shape after padding (LetterBox) when the is_dynamic_shape is 'false'. - is_dynamic_input_ = false; - auto shape = InputInfoOfRuntime(0).shape; - for (int i = 0; i < shape.size(); ++i) { - // if height or width is dynamic - if (i >= 2 && shape[i] <= 0) { - is_dynamic_input_ = true; - break; - } - } - if (!is_dynamic_input_) { - is_mini_pad = false; - } - return true; -} - -bool YOLOv6::Preprocess(Mat* mat, FDTensor* output, - std::map>* im_info) { - // process after image load - float ratio = std::min(size[1] * 1.0f / static_cast(mat->Height()), - size[0] * 1.0f / static_cast(mat->Width())); - if (ratio != 1.0) { - int interp = cv::INTER_AREA; - if (ratio > 1.0) { - interp = cv::INTER_LINEAR; - } - int resize_h = int(round(static_cast(mat->Height()) * ratio)); - int resize_w = int(round(static_cast(mat->Width()) * ratio)); - Resize::Run(mat, resize_w, resize_h, -1, -1, interp); - } - // yolov6's preprocess steps - // 1. letterbox - // 2. BGR->RGB - // 3. HWC->CHW - LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad, is_scale_up, - stride); - BGR2RGB::Run(mat); - // Normalize::Run(mat, std::vector(mat->Channels(), 0.0), - // std::vector(mat->Channels(), 1.0)); - // Compute `result = mat * alpha + beta` directly by channel - std::vector alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; - std::vector beta = {0.0f, 0.0f, 0.0f}; - Convert::Run(mat, alpha, beta); - - // Record output shape of preprocessed image - (*im_info)["output_shape"] = {static_cast(mat->Height()), - static_cast(mat->Width())}; - - HWC2CHW::Run(mat); - Cast::Run(mat, "float"); - mat->ShareWithTensor(output); - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c - return true; -} - -bool YOLOv6::Postprocess( - FDTensor& infer_result, DetectionResult* result, - const std::map>& im_info, - float conf_threshold, float nms_iou_threshold) { - FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now."); - result->Clear(); - result->Reserve(infer_result.shape[1]); - if (infer_result.dtype != FDDataType::FP32) { - FDERROR << "Only support post process with float32 data." << std::endl; - return false; - } - float* data = static_cast(infer_result.Data()); - for (size_t i = 0; i < infer_result.shape[1]; ++i) { - int s = i * infer_result.shape[2]; - float confidence = data[s + 4]; - float* max_class_score = - std::max_element(data + s + 5, data + s + infer_result.shape[2]); - confidence *= (*max_class_score); - // filter boxes by conf_threshold - if (confidence <= conf_threshold) { - continue; - } - int32_t label_id = std::distance(data + s + 5, max_class_score); - // convert from [x, y, w, h] to [x1, y1, x2, y2] - result->boxes.emplace_back(std::array{ - data[s] - data[s + 2] / 2.0f + label_id * max_wh, - data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh, - data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh, - data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh}); - result->label_ids.push_back(label_id); - result->scores.push_back(confidence); - } - utils::NMS(result, nms_iou_threshold); - - // scale the boxes to the origin image shape - auto iter_out = im_info.find("output_shape"); - auto iter_ipt = im_info.find("input_shape"); - FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(), - "Cannot find input_shape or output_shape from im_info."); - float out_h = iter_out->second[0]; - float out_w = iter_out->second[1]; - float ipt_h = iter_ipt->second[0]; - float ipt_w = iter_ipt->second[1]; - float scale = std::min(out_h / ipt_h, out_w / ipt_w); - for (size_t i = 0; i < result->boxes.size(); ++i) { - float pad_h = (out_h - ipt_h * scale) / 2; - float pad_w = (out_w - ipt_w * scale) / 2; - int32_t label_id = (result->label_ids)[i]; - // clip box - result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id; - result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id; - result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id; - result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id; - result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f); - result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f); - result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f); - result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f); - result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f); - result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f); - result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f); - result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f); - } - return true; -} - -bool YOLOv6::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold, - float nms_iou_threshold) { -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_START(0) -#endif - - Mat mat(*im); - std::vector input_tensors(1); - - std::map> im_info; - - // Record the shape of image and the shape of preprocessed image - im_info["input_shape"] = {static_cast(mat.Height()), - static_cast(mat.Width())}; - im_info["output_shape"] = {static_cast(mat.Height()), - static_cast(mat.Width())}; - - if (!Preprocess(&mat, &input_tensors[0], &im_info)) { - FDERROR << "Failed to preprocess input image." << std::endl; - return false; - } - -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_END(0, "Preprocess") - TIMERECORD_START(1) -#endif - - input_tensors[0].name = InputInfoOfRuntime(0).name; - std::vector output_tensors; - if (!Infer(input_tensors, &output_tensors)) { - FDERROR << "Failed to inference." << std::endl; - return false; - } -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_END(1, "Inference") - TIMERECORD_START(2) -#endif - - if (!Postprocess(output_tensors[0], result, im_info, conf_threshold, - nms_iou_threshold)) { - FDERROR << "Failed to post process." << std::endl; - return false; - } - -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_END(2, "Postprocess") -#endif - return true; -} - -} // namespace detection -} // namespace vision -} // namespace fastdeploy \ No newline at end of file diff --git a/csrcs/fastdeploy/vision/detection/contrib/yolov6.h b/csrcs/fastdeploy/vision/detection/contrib/yolov6.h deleted file mode 100644 index 64af6e2eb..000000000 --- a/csrcs/fastdeploy/vision/detection/contrib/yolov6.h +++ /dev/null @@ -1,108 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "fastdeploy/fastdeploy_model.h" -#include "fastdeploy/vision/common/processors/transform.h" -#include "fastdeploy/vision/common/result.h" - -namespace fastdeploy { - -namespace vision { - -namespace detection { - -class FASTDEPLOY_DECL YOLOv6 : public FastDeployModel { - public: - // 当model_format为ONNX时,无需指定params_file - // 当model_format为Paddle时,则需同时指定model_file & params_file - YOLOv6(const std::string& model_file, const std::string& params_file = "", - const RuntimeOption& custom_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX); - - // 定义模型的名称 - std::string ModelName() const { return "YOLOv6"; } - - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 - // conf_threshold 为后处理的参数 - // nms_iou_threshold 为后处理的参数 - virtual bool Predict(cv::Mat* im, DetectionResult* result, - float conf_threshold = 0.25, - float nms_iou_threshold = 0.5); - - // 以下为模型在预测时的一些参数,基本是前后处理所需 - // 用户在创建模型后,可根据模型的要求,以及自己的需求 - // 对参数进行修改 - // tuple of (width, height) - std::vector size; - // padding value, size should be same with Channels - std::vector padding_value; - // only pad to the minimum rectange which height and width is times of stride - bool is_mini_pad; - // while is_mini_pad = false and is_no_pad = true, will resize the image to - // the set size - bool is_no_pad; - // if is_scale_up is false, the input image only can be zoom out, the maximum - // resize scale cannot exceed 1.0 - bool is_scale_up; - // padding stride, for is_mini_pad - int stride; - // for offseting the boxes by classes when using NMS, default 4096 in - // meituan/YOLOv6 - float max_wh; - - private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 - bool Initialize(); - - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 - // im_info为预处理过程保存的数据,在后处理中需要用到 - bool Preprocess(Mat* mat, FDTensor* outputs, - std::map>* im_info); - - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 - // im_info 为预处理记录的信息,后处理用于还原box - // conf_threshold 后处理时过滤box的置信度阈值 - // nms_iou_threshold 后处理时NMS设定的iou阈值 - bool Postprocess(FDTensor& infer_result, DetectionResult* result, - const std::map>& im_info, - float conf_threshold, float nms_iou_threshold); - - // 查看输入是否为动态维度的 不建议直接使用 不同模型的逻辑可能不一致 - bool IsDynamicInput() const { return is_dynamic_input_; } - - void LetterBox(Mat* mat, std::vector size, std::vector color, - bool _auto, bool scale_fill = false, bool scale_up = true, - int stride = 32); - - // whether to inference with dynamic shape (e.g ONNX export with dynamic shape - // or not.) - // meituan/YOLOv6 official 'export_onnx.py' script will export static ONNX by - // default. - // while is_dynamic_input if 'false', is_mini_pad will force 'false'. This - // value will - // auto check by fastdeploy after the internal Runtime already initialized. - bool is_dynamic_input_; -}; - -} // namespace detection -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/detection/contrib/yolov6_pybind.cc b/csrcs/fastdeploy/vision/detection/contrib/yolov6_pybind.cc deleted file mode 100644 index a1d0131df..000000000 --- a/csrcs/fastdeploy/vision/detection/contrib/yolov6_pybind.cc +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/pybind/main.h" - -namespace fastdeploy { -void BindYOLOv6(pybind11::module& m) { - pybind11::class_(m, "YOLOv6") - .def(pybind11::init()) - .def("predict", - [](vision::detection::YOLOv6& self, pybind11::array& data, - float conf_threshold, float nms_iou_threshold) { - auto mat = PyArrayToCvMat(data); - vision::DetectionResult res; - self.Predict(&mat, &res, conf_threshold, nms_iou_threshold); - return res; - }) - .def_readwrite("size", &vision::detection::YOLOv6::size) - .def_readwrite("padding_value", &vision::detection::YOLOv6::padding_value) - .def_readwrite("is_mini_pad", &vision::detection::YOLOv6::is_mini_pad) - .def_readwrite("is_no_pad", &vision::detection::YOLOv6::is_no_pad) - .def_readwrite("is_scale_up", &vision::detection::YOLOv6::is_scale_up) - .def_readwrite("stride", &vision::detection::YOLOv6::stride) - .def_readwrite("max_wh", &vision::detection::YOLOv6::max_wh); -} -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/detection/contrib/yolov7.cc b/csrcs/fastdeploy/vision/detection/contrib/yolov7.cc deleted file mode 100644 index edc1b9048..000000000 --- a/csrcs/fastdeploy/vision/detection/contrib/yolov7.cc +++ /dev/null @@ -1,253 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/detection/contrib/yolov7.h" -#include "fastdeploy/utils/perf.h" -#include "fastdeploy/vision/utils/utils.h" - -namespace fastdeploy { -namespace vision { -namespace detection { - -void YOLOv7::LetterBox(Mat* mat, const std::vector& size, - const std::vector& color, bool _auto, - bool scale_fill, bool scale_up, int stride) { - float scale = - std::min(size[1] * 1.0 / mat->Height(), size[0] * 1.0 / mat->Width()); - if (!scale_up) { - scale = std::min(scale, 1.0f); - } - - int resize_h = int(round(mat->Height() * scale)); - int resize_w = int(round(mat->Width() * scale)); - - int pad_w = size[0] - resize_w; - int pad_h = size[1] - resize_h; - if (_auto) { - pad_h = pad_h % stride; - pad_w = pad_w % stride; - } else if (scale_fill) { - pad_h = 0; - pad_w = 0; - resize_h = size[1]; - resize_w = size[0]; - } - if (resize_h != mat->Height() || resize_w != mat->Width()) { - Resize::Run(mat, resize_w, resize_h); - } - if (pad_h > 0 || pad_w > 0) { - float half_h = pad_h * 1.0 / 2; - int top = int(round(half_h - 0.1)); - int bottom = int(round(half_h + 0.1)); - float half_w = pad_w * 1.0 / 2; - int left = int(round(half_w - 0.1)); - int right = int(round(half_w + 0.1)); - Pad::Run(mat, top, bottom, left, right, color); - } -} - -YOLOv7::YOLOv7(const std::string& model_file, const std::string& params_file, - const RuntimeOption& custom_option, - const Frontend& model_format) { - if (model_format == Frontend::ONNX) { - valid_cpu_backends = {Backend::ORT}; // 指定可用的CPU后端 - valid_gpu_backends = {Backend::ORT, Backend::TRT}; // 指定可用的GPU后端 - } else { - valid_cpu_backends = {Backend::PDINFER}; - valid_gpu_backends = {Backend::PDINFER}; - } - runtime_option = custom_option; - runtime_option.model_format = model_format; - runtime_option.model_file = model_file; - initialized = Initialize(); -} - -bool YOLOv7::Initialize() { - // parameters for preprocess - size = {640, 640}; - padding_value = {114.0, 114.0, 114.0}; - is_mini_pad = false; - is_no_pad = false; - is_scale_up = false; - stride = 32; - max_wh = 7680.0; - - if (!InitRuntime()) { - FDERROR << "Failed to initialize fastdeploy backend." << std::endl; - return false; - } - // Check if the input shape is dynamic after Runtime already initialized, - // Note that, We need to force is_mini_pad 'false' to keep static - // shape after padding (LetterBox) when the is_dynamic_shape is 'false'. - is_dynamic_input_ = false; - auto shape = InputInfoOfRuntime(0).shape; - for (int i = 0; i < shape.size(); ++i) { - // if height or width is dynamic - if (i >= 2 && shape[i] <= 0) { - is_dynamic_input_ = true; - break; - } - } - if (!is_dynamic_input_) { - is_mini_pad = false; - } - return true; -} - -bool YOLOv7::Preprocess(Mat* mat, FDTensor* output, - std::map>* im_info) { - // process after image load - float ratio = std::min(size[1] * 1.0f / static_cast(mat->Height()), - size[0] * 1.0f / static_cast(mat->Width())); - if (ratio != 1.0) { - int interp = cv::INTER_AREA; - if (ratio > 1.0) { - interp = cv::INTER_LINEAR; - } - int resize_h = int(mat->Height() * ratio); - int resize_w = int(mat->Width() * ratio); - Resize::Run(mat, resize_w, resize_h, -1, -1, interp); - } - // yolov7's preprocess steps - // 1. letterbox - // 2. BGR->RGB - // 3. HWC->CHW - YOLOv7::LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad, - is_scale_up, stride); - BGR2RGB::Run(mat); - // Normalize::Run(mat, std::vector(mat->Channels(), 0.0), - // std::vector(mat->Channels(), 1.0)); - // Compute `result = mat * alpha + beta` directly by channel - std::vector alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; - std::vector beta = {0.0f, 0.0f, 0.0f}; - Convert::Run(mat, alpha, beta); - - // Record output shape of preprocessed image - (*im_info)["output_shape"] = {static_cast(mat->Height()), - static_cast(mat->Width())}; - - HWC2CHW::Run(mat); - Cast::Run(mat, "float"); - mat->ShareWithTensor(output); - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c - return true; -} - -bool YOLOv7::Postprocess( - FDTensor& infer_result, DetectionResult* result, - const std::map>& im_info, - float conf_threshold, float nms_iou_threshold) { - FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now."); - result->Clear(); - result->Reserve(infer_result.shape[1]); - if (infer_result.dtype != FDDataType::FP32) { - FDERROR << "Only support post process with float32 data." << std::endl; - return false; - } - float* data = static_cast(infer_result.Data()); - for (size_t i = 0; i < infer_result.shape[1]; ++i) { - int s = i * infer_result.shape[2]; - float confidence = data[s + 4]; - float* max_class_score = - std::max_element(data + s + 5, data + s + infer_result.shape[2]); - confidence *= (*max_class_score); - // filter boxes by conf_threshold - if (confidence <= conf_threshold) { - continue; - } - int32_t label_id = std::distance(data + s + 5, max_class_score); - // convert from [x, y, w, h] to [x1, y1, x2, y2] - result->boxes.emplace_back(std::array{ - data[s] - data[s + 2] / 2.0f + label_id * max_wh, - data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh, - data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh, - data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh}); - result->label_ids.push_back(label_id); - result->scores.push_back(confidence); - } - utils::NMS(result, nms_iou_threshold); - - // scale the boxes to the origin image shape - auto iter_out = im_info.find("output_shape"); - auto iter_ipt = im_info.find("input_shape"); - FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(), - "Cannot find input_shape or output_shape from im_info."); - float out_h = iter_out->second[0]; - float out_w = iter_out->second[1]; - float ipt_h = iter_ipt->second[0]; - float ipt_w = iter_ipt->second[1]; - float scale = std::min(out_h / ipt_h, out_w / ipt_w); - float pad_h = (out_h - ipt_h * scale) / 2.0f; - float pad_w = (out_w - ipt_w * scale) / 2.0f; - if (is_mini_pad) { - // 和 LetterBox中_auto=true的处理逻辑对应 - pad_h = static_cast(static_cast(pad_h) % stride); - pad_w = static_cast(static_cast(pad_w) % stride); - } - for (size_t i = 0; i < result->boxes.size(); ++i) { - int32_t label_id = (result->label_ids)[i]; - // clip box - result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id; - result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id; - result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id; - result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id; - result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f); - result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f); - result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f); - result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f); - result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f); - result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f); - result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f); - result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f); - } - return true; -} - -bool YOLOv7::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold, - float nms_iou_threshold) { - Mat mat(*im); - std::vector input_tensors(1); - - std::map> im_info; - - // Record the shape of image and the shape of preprocessed image - im_info["input_shape"] = {static_cast(mat.Height()), - static_cast(mat.Width())}; - im_info["output_shape"] = {static_cast(mat.Height()), - static_cast(mat.Width())}; - - if (!Preprocess(&mat, &input_tensors[0], &im_info)) { - FDERROR << "Failed to preprocess input image." << std::endl; - return false; - } - - input_tensors[0].name = InputInfoOfRuntime(0).name; - std::vector output_tensors; - if (!Infer(input_tensors, &output_tensors)) { - FDERROR << "Failed to inference." << std::endl; - return false; - } - - if (!Postprocess(output_tensors[0], result, im_info, conf_threshold, - nms_iou_threshold)) { - FDERROR << "Failed to post process." << std::endl; - return false; - } - - return true; -} - -} // namespace detection -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/detection/contrib/yolov7.h b/csrcs/fastdeploy/vision/detection/contrib/yolov7.h deleted file mode 100644 index 02b874b2c..000000000 --- a/csrcs/fastdeploy/vision/detection/contrib/yolov7.h +++ /dev/null @@ -1,100 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include "fastdeploy/fastdeploy_model.h" -#include "fastdeploy/vision/common/processors/transform.h" -#include "fastdeploy/vision/common/result.h" - -namespace fastdeploy { -namespace vision { -namespace detection { - -class FASTDEPLOY_DECL YOLOv7 : public FastDeployModel { - public: - YOLOv7(const std::string& model_file, const std::string& params_file = "", - const RuntimeOption& custom_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX); - - // 定义模型的名称 - virtual std::string ModelName() const { return "yolov7"; } - - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 - // conf_threshold 为后处理的参数 - // nms_iou_threshold 为后处理的参数 - virtual bool Predict(cv::Mat* im, DetectionResult* result, - float conf_threshold = 0.25, - float nms_iou_threshold = 0.5); - - // 以下为模型在预测时的一些参数,基本是前后处理所需 - // 用户在创建模型后,可根据模型的要求,以及自己的需求 - // 对参数进行修改 - // tuple of (width, height) - std::vector size; - // padding value, size should be same with Channels - std::vector padding_value; - // only pad to the minimum rectange which height and width is times of stride - bool is_mini_pad; - // while is_mini_pad = false and is_no_pad = true, will resize the image to - // the set size - bool is_no_pad; - // if is_scale_up is false, the input image only can be zoom out, the maximum - // resize scale cannot exceed 1.0 - bool is_scale_up; - // padding stride, for is_mini_pad - int stride; - // for offseting the boxes by classes when using NMS - float max_wh; - - private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 - bool Initialize(); - - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 - // im_info为预处理过程保存的数据,在后处理中需要用到 - bool Preprocess(Mat* mat, FDTensor* output, - std::map>* im_info); - - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 - // im_info 为预处理记录的信息,后处理用于还原box - // conf_threshold 后处理时过滤box的置信度阈值 - // nms_iou_threshold 后处理时NMS设定的iou阈值 - bool Postprocess(FDTensor& infer_result, DetectionResult* result, - const std::map>& im_info, - float conf_threshold, float nms_iou_threshold); - - // 对图片进行LetterBox处理 - // mat 为读取到的原图 - // size 为输入模型的图像尺寸 - void LetterBox(Mat* mat, const std::vector& size, - const std::vector& color, bool _auto, - bool scale_fill = false, bool scale_up = true, - int stride = 32); - - // whether to inference with dynamic shape (e.g ONNX export with dynamic shape - // or not.) - // while is_dynamic_shape if 'false', is_mini_pad will force 'false'. This - // value will - // auto check by fastdeploy after the internal Runtime already initialized. - bool is_dynamic_input_; -}; -} // namespace detection -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/detection/contrib/yolov7_pybind.cc b/csrcs/fastdeploy/vision/detection/contrib/yolov7_pybind.cc deleted file mode 100644 index bf196fa9f..000000000 --- a/csrcs/fastdeploy/vision/detection/contrib/yolov7_pybind.cc +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/pybind/main.h" - -namespace fastdeploy { -void BindYOLOv7(pybind11::module& m) { - pybind11::class_(m, "YOLOv7") - .def(pybind11::init()) - .def("predict", - [](vision::detection::YOLOv7& self, pybind11::array& data, - float conf_threshold, float nms_iou_threshold) { - auto mat = PyArrayToCvMat(data); - vision::DetectionResult res; - self.Predict(&mat, &res, conf_threshold, nms_iou_threshold); - return res; - }) - .def_readwrite("size", &vision::detection::YOLOv7::size) - .def_readwrite("padding_value", &vision::detection::YOLOv7::padding_value) - .def_readwrite("is_mini_pad", &vision::detection::YOLOv7::is_mini_pad) - .def_readwrite("is_no_pad", &vision::detection::YOLOv7::is_no_pad) - .def_readwrite("is_scale_up", &vision::detection::YOLOv7::is_scale_up) - .def_readwrite("stride", &vision::detection::YOLOv7::stride) - .def_readwrite("max_wh", &vision::detection::YOLOv7::max_wh); -} -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/detection/contrib/yolox.cc b/csrcs/fastdeploy/vision/detection/contrib/yolox.cc deleted file mode 100644 index 5d3880657..000000000 --- a/csrcs/fastdeploy/vision/detection/contrib/yolox.cc +++ /dev/null @@ -1,339 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/detection/contrib/yolox.h" -#include "fastdeploy/utils/perf.h" -#include "fastdeploy/vision/utils/utils.h" - -namespace fastdeploy { - -namespace vision { - -namespace detection { - -struct YOLOXAnchor { - int grid0; - int grid1; - int stride; -}; - -void GenerateYOLOXAnchors(const std::vector& size, - const std::vector& downsample_strides, - std::vector* anchors) { - // size: tuple of input (width, height) - // downsample_strides: downsample strides in YOLOX, e.g (8,16,32) - const int width = size[0]; - const int height = size[1]; - for (const auto& ds : downsample_strides) { - int num_grid_w = width / ds; - int num_grid_h = height / ds; - for (int g1 = 0; g1 < num_grid_h; ++g1) { - for (int g0 = 0; g0 < num_grid_w; ++g0) { - (*anchors).emplace_back(YOLOXAnchor{g0, g1, ds}); - } - } - } -} - -void LetterBoxWithRightBottomPad(Mat* mat, std::vector size, - std::vector color) { - // specific pre process for YOLOX, not the same as YOLOv5 - // reference: YOLOX/yolox/data/data_augment.py#L142 - float r = std::min(size[1] * 1.0f / static_cast(mat->Height()), - size[0] * 1.0f / static_cast(mat->Width())); - - int resize_h = int(round(static_cast(mat->Height()) * r)); - int resize_w = int(round(static_cast(mat->Width()) * r)); - - if (resize_h != mat->Height() || resize_w != mat->Width()) { - Resize::Run(mat, resize_w, resize_h); - } - - int pad_w = size[0] - resize_w; - int pad_h = size[1] - resize_h; - // right-bottom padding for YOLOX - if (pad_h > 0 || pad_w > 0) { - int top = 0; - int left = 0; - int right = pad_w; - int bottom = pad_h; - Pad::Run(mat, top, bottom, left, right, color); - } -} - -YOLOX::YOLOX(const std::string& model_file, const std::string& params_file, - const RuntimeOption& custom_option, const Frontend& model_format) { - if (model_format == Frontend::ONNX) { - valid_cpu_backends = {Backend::ORT}; // 指定可用的CPU后端 - valid_gpu_backends = {Backend::ORT, Backend::TRT}; // 指定可用的GPU后端 - } else { - valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; - valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; - } - runtime_option = custom_option; - runtime_option.model_format = model_format; - runtime_option.model_file = model_file; - runtime_option.params_file = params_file; - initialized = Initialize(); -} - -bool YOLOX::Initialize() { - // parameters for preprocess - size = {640, 640}; - padding_value = {114.0, 114.0, 114.0}; - downsample_strides = {8, 16, 32}; - max_wh = 4096.0f; - is_decode_exported = false; - - if (!InitRuntime()) { - FDERROR << "Failed to initialize fastdeploy backend." << std::endl; - return false; - } - // Check if the input shape is dynamic after Runtime already initialized. - is_dynamic_input_ = false; - auto shape = InputInfoOfRuntime(0).shape; - for (int i = 0; i < shape.size(); ++i) { - // if height or width is dynamic - if (i >= 2 && shape[i] <= 0) { - is_dynamic_input_ = true; - break; - } - } - return true; -} - -bool YOLOX::Preprocess(Mat* mat, FDTensor* output, - std::map>* im_info) { - // YOLOX ( >= v0.1.1) preprocess steps - // 1. preproc - // 2. HWC->CHW - // 3. NO!!! BRG2GRB and Normalize needed in YOLOX - LetterBoxWithRightBottomPad(mat, size, padding_value); - // Record output shape of preprocessed image - (*im_info)["output_shape"] = {static_cast(mat->Height()), - static_cast(mat->Width())}; - - HWC2CHW::Run(mat); - Cast::Run(mat, "float"); - mat->ShareWithTensor(output); - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c - return true; -} - -bool YOLOX::Postprocess( - FDTensor& infer_result, DetectionResult* result, - const std::map>& im_info, - float conf_threshold, float nms_iou_threshold) { - FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now."); - result->Clear(); - result->Reserve(infer_result.shape[1]); - if (infer_result.dtype != FDDataType::FP32) { - FDERROR << "Only support post process with float32 data." << std::endl; - return false; - } - float* data = static_cast(infer_result.Data()); - for (size_t i = 0; i < infer_result.shape[1]; ++i) { - int s = i * infer_result.shape[2]; - float confidence = data[s + 4]; - float* max_class_score = - std::max_element(data + s + 5, data + s + infer_result.shape[2]); - confidence *= (*max_class_score); - // filter boxes by conf_threshold - if (confidence <= conf_threshold) { - continue; - } - int32_t label_id = std::distance(data + s + 5, max_class_score); - // convert from [x, y, w, h] to [x1, y1, x2, y2] - result->boxes.emplace_back(std::array{ - data[s] - data[s + 2] / 2.0f + label_id * max_wh, - data[s + 1] - data[s + 3] / 2.0f + label_id * max_wh, - data[s + 0] + data[s + 2] / 2.0f + label_id * max_wh, - data[s + 1] + data[s + 3] / 2.0f + label_id * max_wh}); - result->label_ids.push_back(label_id); - result->scores.push_back(confidence); - } - utils::NMS(result, nms_iou_threshold); - - // scale the boxes to the origin image shape - auto iter_out = im_info.find("output_shape"); - auto iter_ipt = im_info.find("input_shape"); - FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(), - "Cannot find input_shape or output_shape from im_info."); - float out_h = iter_out->second[0]; - float out_w = iter_out->second[1]; - float ipt_h = iter_ipt->second[0]; - float ipt_w = iter_ipt->second[1]; - float r = std::min(out_h / ipt_h, out_w / ipt_w); - for (size_t i = 0; i < result->boxes.size(); ++i) { - int32_t label_id = (result->label_ids)[i]; - // clip box - result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id; - result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id; - result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id; - result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id; - result->boxes[i][0] = std::max(result->boxes[i][0] / r, 0.0f); - result->boxes[i][1] = std::max(result->boxes[i][1] / r, 0.0f); - result->boxes[i][2] = std::max(result->boxes[i][2] / r, 0.0f); - result->boxes[i][3] = std::max(result->boxes[i][3] / r, 0.0f); - result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f); - result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f); - result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f); - result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f); - } - return true; -} - -bool YOLOX::PostprocessWithDecode( - FDTensor& infer_result, DetectionResult* result, - const std::map>& im_info, - float conf_threshold, float nms_iou_threshold) { - FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now."); - result->Clear(); - result->Reserve(infer_result.shape[1]); - if (infer_result.dtype != FDDataType::FP32) { - FDERROR << "Only support post process with float32 data." << std::endl; - return false; - } - // generate anchors with dowmsample strides - std::vector anchors; - GenerateYOLOXAnchors(size, downsample_strides, &anchors); - - // infer_result shape might look like (1,n,85=5+80) - float* data = static_cast(infer_result.Data()); - for (size_t i = 0; i < infer_result.shape[1]; ++i) { - int s = i * infer_result.shape[2]; - float confidence = data[s + 4]; - float* max_class_score = - std::max_element(data + s + 5, data + s + infer_result.shape[2]); - confidence *= (*max_class_score); - // filter boxes by conf_threshold - if (confidence <= conf_threshold) { - continue; - } - int32_t label_id = std::distance(data + s + 5, max_class_score); - // fetch i-th anchor - float grid0 = static_cast(anchors.at(i).grid0); - float grid1 = static_cast(anchors.at(i).grid1); - float downsample_stride = static_cast(anchors.at(i).stride); - // convert from offsets to [x, y, w, h] - float dx = data[s]; - float dy = data[s + 1]; - float dw = data[s + 2]; - float dh = data[s + 3]; - - float x = (dx + grid0) * downsample_stride; - float y = (dy + grid1) * downsample_stride; - float w = std::exp(dw) * downsample_stride; - float h = std::exp(dh) * downsample_stride; - - // convert from [x, y, w, h] to [x1, y1, x2, y2] - result->boxes.emplace_back(std::array{ - x - w / 2.0f + label_id * max_wh, y - h / 2.0f + label_id * max_wh, - x + w / 2.0f + label_id * max_wh, y + h / 2.0f + label_id * max_wh}); - // label_id * max_wh for multi classes NMS - result->label_ids.push_back(label_id); - result->scores.push_back(confidence); - } - utils::NMS(result, nms_iou_threshold); - - // scale the boxes to the origin image shape - auto iter_out = im_info.find("output_shape"); - auto iter_ipt = im_info.find("input_shape"); - FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(), - "Cannot find input_shape or output_shape from im_info."); - float out_h = iter_out->second[0]; - float out_w = iter_out->second[1]; - float ipt_h = iter_ipt->second[0]; - float ipt_w = iter_ipt->second[1]; - float r = std::min(out_h / ipt_h, out_w / ipt_w); - for (size_t i = 0; i < result->boxes.size(); ++i) { - int32_t label_id = (result->label_ids)[i]; - // clip box - result->boxes[i][0] = result->boxes[i][0] - max_wh * label_id; - result->boxes[i][1] = result->boxes[i][1] - max_wh * label_id; - result->boxes[i][2] = result->boxes[i][2] - max_wh * label_id; - result->boxes[i][3] = result->boxes[i][3] - max_wh * label_id; - result->boxes[i][0] = std::max(result->boxes[i][0] / r, 0.0f); - result->boxes[i][1] = std::max(result->boxes[i][1] / r, 0.0f); - result->boxes[i][2] = std::max(result->boxes[i][2] / r, 0.0f); - result->boxes[i][3] = std::max(result->boxes[i][3] / r, 0.0f); - result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f); - result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f); - result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f); - result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f); - } - return true; -} - -bool YOLOX::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold, - float nms_iou_threshold) { -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_START(0) -#endif - - Mat mat(*im); - std::vector input_tensors(1); - - std::map> im_info; - - // Record the shape of image and the shape of preprocessed image - im_info["input_shape"] = {static_cast(mat.Height()), - static_cast(mat.Width())}; - im_info["output_shape"] = {static_cast(mat.Height()), - static_cast(mat.Width())}; - - if (!Preprocess(&mat, &input_tensors[0], &im_info)) { - FDERROR << "Failed to preprocess input image." << std::endl; - return false; - } - -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_END(0, "Preprocess") - TIMERECORD_START(1) -#endif - - input_tensors[0].name = InputInfoOfRuntime(0).name; - std::vector output_tensors; - if (!Infer(input_tensors, &output_tensors)) { - FDERROR << "Failed to inference." << std::endl; - return false; - } -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_END(1, "Inference") - TIMERECORD_START(2) -#endif - - if (is_decode_exported) { - if (!Postprocess(output_tensors[0], result, im_info, conf_threshold, - nms_iou_threshold)) { - FDERROR << "Failed to post process." << std::endl; - return false; - } - } else { - if (!PostprocessWithDecode(output_tensors[0], result, im_info, - conf_threshold, nms_iou_threshold)) { - FDERROR << "Failed to post process." << std::endl; - return false; - } - } - -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_END(2, "Postprocess") -#endif - return true; -} - -} // namespace detection -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/detection/contrib/yolox.h b/csrcs/fastdeploy/vision/detection/contrib/yolox.h deleted file mode 100644 index fc27ca1ed..000000000 --- a/csrcs/fastdeploy/vision/detection/contrib/yolox.h +++ /dev/null @@ -1,107 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "fastdeploy/fastdeploy_model.h" -#include "fastdeploy/vision/common/processors/transform.h" -#include "fastdeploy/vision/common/result.h" - -namespace fastdeploy { - -namespace vision { - -namespace detection { - -class FASTDEPLOY_DECL YOLOX : public FastDeployModel { - public: - // 当model_format为ONNX时,无需指定params_file - // 当model_format为Paddle时,则需同时指定model_file & params_file - YOLOX(const std::string& model_file, const std::string& params_file = "", - const RuntimeOption& custom_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX); - - // 定义模型的名称 - std::string ModelName() const { return "YOLOX"; } - - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 - // conf_threshold 为后处理的参数 - // nms_iou_threshold 为后处理的参数 - virtual bool Predict(cv::Mat* im, DetectionResult* result, - float conf_threshold = 0.25, - float nms_iou_threshold = 0.5); - - // 以下为模型在预测时的一些参数,基本是前后处理所需 - // 用户在创建模型后,可根据模型的要求,以及自己的需求 - // 对参数进行修改 - // tuple of (width, height) - std::vector size; - // padding value, size should be same with Channels - std::vector padding_value; - // whether the model_file was exported with decode module. The official - // YOLOX/tools/export_onnx.py script will export ONNX file without - // decode module. Please set it 'true' manually if the model file - // was exported with decode module. - bool is_decode_exported; - // downsample strides for YOLOX to generate anchors, will take - // (8,16,32) as default values, might have stride=64. - std::vector downsample_strides; - // for offseting the boxes by classes when using NMS, default 4096. - float max_wh; - - private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 - bool Initialize(); - - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 - // im_info为预处理过程保存的数据,在后处理中需要用到 - bool Preprocess(Mat* mat, FDTensor* outputs, - std::map>* im_info); - - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 - // im_info 为预处理记录的信息,后处理用于还原box - // conf_threshold 后处理时过滤box的置信度阈值 - // nms_iou_threshold 后处理时NMS设定的iou阈值 - bool Postprocess(FDTensor& infer_result, DetectionResult* result, - const std::map>& im_info, - float conf_threshold, float nms_iou_threshold); - - // YOLOX的官方脚本默认导出不带decode模块的模型文件 需要在后处理进行decode - bool PostprocessWithDecode( - FDTensor& infer_result, DetectionResult* result, - const std::map>& im_info, - float conf_threshold, float nms_iou_threshold); - - // 查看输入是否为动态维度的 不建议直接使用 不同模型的逻辑可能不一致 - bool IsDynamicInput() const { return is_dynamic_input_; } - - // whether to inference with dynamic shape (e.g ONNX export with dynamic shape - // or not.) - // megvii/YOLOX official 'export_onnx.py' script will export static ONNX by - // default. - // while is_dynamic_shape if 'false', is_mini_pad will force 'false'. This - // value will - // auto check by fastdeploy after the internal Runtime already initialized. - bool is_dynamic_input_; -}; - -} // namespace detection -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/detection/contrib/yolox_pybind.cc b/csrcs/fastdeploy/vision/detection/contrib/yolox_pybind.cc deleted file mode 100644 index 68cb6a426..000000000 --- a/csrcs/fastdeploy/vision/detection/contrib/yolox_pybind.cc +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/pybind/main.h" - -namespace fastdeploy { -void BindYOLOX(pybind11::module& m) { - pybind11::class_(m, "YOLOX") - .def(pybind11::init()) - .def("predict", - [](vision::detection::YOLOX& self, pybind11::array& data, - float conf_threshold, float nms_iou_threshold) { - auto mat = PyArrayToCvMat(data); - vision::DetectionResult res; - self.Predict(&mat, &res, conf_threshold, nms_iou_threshold); - return res; - }) - .def_readwrite("size", &vision::detection::YOLOX::size) - .def_readwrite("padding_value", &vision::detection::YOLOX::padding_value) - .def_readwrite("is_decode_exported", - &vision::detection::YOLOX::is_decode_exported) - .def_readwrite("downsample_strides", - &vision::detection::YOLOX::downsample_strides) - .def_readwrite("max_wh", &vision::detection::YOLOX::max_wh); -} -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/detection/detection_pybind.cc b/csrcs/fastdeploy/vision/detection/detection_pybind.cc deleted file mode 100644 index a865dc11e..000000000 --- a/csrcs/fastdeploy/vision/detection/detection_pybind.cc +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/pybind/main.h" - -namespace fastdeploy { - -void BindYOLOv7(pybind11::module& m); -void BindScaledYOLOv4(pybind11::module& m); -void BindYOLOR(pybind11::module& m); -void BindYOLOv6(pybind11::module& m); -void BindYOLOv5Lite(pybind11::module& m); -void BindYOLOv5(pybind11::module& m); -void BindYOLOX(pybind11::module& m); -void BindNanoDetPlus(pybind11::module& m); -void BindPPDet(pybind11::module& m); - -void BindDetection(pybind11::module& m) { - auto detection_module = - m.def_submodule("detection", "Image object detection models."); - BindPPDet(detection_module); - BindYOLOv7(detection_module); - BindScaledYOLOv4(detection_module); - BindYOLOR(detection_module); - BindYOLOv6(detection_module); - BindYOLOv5Lite(detection_module); - BindYOLOv5(detection_module); - BindYOLOX(detection_module); - BindNanoDetPlus(detection_module); -} -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/detection/ppdet/model.h b/csrcs/fastdeploy/vision/detection/ppdet/model.h deleted file mode 100644 index f40c6b7fe..000000000 --- a/csrcs/fastdeploy/vision/detection/ppdet/model.h +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include "fastdeploy/vision/detection/ppdet/picodet.h" -#include "fastdeploy/vision/detection/ppdet/ppyolo.h" -#include "fastdeploy/vision/detection/ppdet/ppyoloe.h" -#include "fastdeploy/vision/detection/ppdet/rcnn.h" -#include "fastdeploy/vision/detection/ppdet/yolov3.h" -#include "fastdeploy/vision/detection/ppdet/yolox.h" diff --git a/csrcs/fastdeploy/vision/detection/ppdet/picodet.cc b/csrcs/fastdeploy/vision/detection/ppdet/picodet.cc deleted file mode 100644 index d89fab2ae..000000000 --- a/csrcs/fastdeploy/vision/detection/ppdet/picodet.cc +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/detection/ppdet/picodet.h" -#include "yaml-cpp/yaml.h" - -namespace fastdeploy { -namespace vision { -namespace detection { - -PicoDet::PicoDet(const std::string& model_file, const std::string& params_file, - const std::string& config_file, - const RuntimeOption& custom_option, - const Frontend& model_format) { - config_file_ = config_file; - valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; - valid_gpu_backends = {Backend::PDINFER, Backend::ORT}; - runtime_option = custom_option; - runtime_option.model_format = model_format; - runtime_option.model_file = model_file; - runtime_option.params_file = params_file; - background_label = -1; - keep_top_k = 100; - nms_eta = 1; - nms_threshold = 0.6; - nms_top_k = 1000; - normalized = true; - score_threshold = 0.025; - CheckIfContainDecodeAndNMS(); - initialized = Initialize(); -} - -bool PicoDet::CheckIfContainDecodeAndNMS() { - YAML::Node cfg; - try { - cfg = YAML::LoadFile(config_file_); - } catch (YAML::BadFile& e) { - FDERROR << "Failed to load yaml file " << config_file_ - << ", maybe you should check this file." << std::endl; - return false; - } - - if (cfg["arch"].as() == "PicoDet") { - FDERROR << "The arch in config file is PicoDet, which means this model " - "doesn contain box decode and nms, please export model with " - "decode and nms." - << std::endl; - return false; - } - return true; -} - -} // namespace detection -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/detection/ppdet/picodet.h b/csrcs/fastdeploy/vision/detection/ppdet/picodet.h deleted file mode 100644 index 984e56222..000000000 --- a/csrcs/fastdeploy/vision/detection/ppdet/picodet.h +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include "fastdeploy/vision/detection/ppdet/ppyoloe.h" - -namespace fastdeploy { -namespace vision { -namespace detection { - -class FASTDEPLOY_DECL PicoDet : public PPYOLOE { - public: - PicoDet(const std::string& model_file, const std::string& params_file, - const std::string& config_file, - const RuntimeOption& custom_option = RuntimeOption(), - const Frontend& model_format = Frontend::PADDLE); - - // Only support picodet contains decode and nms - bool CheckIfContainDecodeAndNMS(); - - virtual std::string ModelName() const { return "PicoDet"; } -}; -} // namespace detection -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/detection/ppdet/ppdet_pybind.cc b/csrcs/fastdeploy/vision/detection/ppdet/ppdet_pybind.cc deleted file mode 100644 index 2f4b0fefc..000000000 --- a/csrcs/fastdeploy/vision/detection/ppdet/ppdet_pybind.cc +++ /dev/null @@ -1,95 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#include "fastdeploy/pybind/main.h" - -namespace fastdeploy { -void BindPPDet(pybind11::module& m) { - pybind11::class_(m, - "PPYOLOE") - .def(pybind11::init()) - .def("predict", [](vision::detection::PPYOLOE& self, pybind11::array& data) { - auto mat = PyArrayToCvMat(data); - vision::DetectionResult res; - self.Predict(&mat, &res); - return res; - }); - - pybind11::class_(m, - "PPYOLO") - .def(pybind11::init()) - .def("predict", [](vision::detection::PPYOLO& self, pybind11::array& data) { - auto mat = PyArrayToCvMat(data); - vision::DetectionResult res; - self.Predict(&mat, &res); - return res; - }); - - pybind11::class_(m, - "PPYOLOv2") - .def(pybind11::init()) - .def("predict", [](vision::detection::PPYOLOv2& self, pybind11::array& data) { - auto mat = PyArrayToCvMat(data); - vision::DetectionResult res; - self.Predict(&mat, &res); - return res; - }); - - pybind11::class_(m, - "PicoDet") - .def(pybind11::init()) - .def("predict", [](vision::detection::PicoDet& self, pybind11::array& data) { - auto mat = PyArrayToCvMat(data); - vision::DetectionResult res; - self.Predict(&mat, &res); - return res; - }); - - pybind11::class_(m, "PaddleYOLOX") - .def(pybind11::init()) - .def("predict", [](vision::detection::PaddleYOLOX& self, pybind11::array& data) { - auto mat = PyArrayToCvMat(data); - vision::DetectionResult res; - self.Predict(&mat, &res); - return res; - }); - - pybind11::class_(m, - "FasterRCNN") - .def(pybind11::init()) - .def("predict", - [](vision::detection::FasterRCNN& self, pybind11::array& data) { - auto mat = PyArrayToCvMat(data); - vision::DetectionResult res; - self.Predict(&mat, &res); - return res; - }); - - pybind11::class_(m, - "YOLOv3") - .def(pybind11::init()) - .def("predict", [](vision::detection::YOLOv3& self, pybind11::array& data) { - auto mat = PyArrayToCvMat(data); - vision::DetectionResult res; - self.Predict(&mat, &res); - return res; - }); -} -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/detection/ppdet/ppyolo.cc b/csrcs/fastdeploy/vision/detection/ppdet/ppyolo.cc deleted file mode 100644 index 6c202f0d0..000000000 --- a/csrcs/fastdeploy/vision/detection/ppdet/ppyolo.cc +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/detection/ppdet/ppyolo.h" - -namespace fastdeploy { -namespace vision { -namespace detection { - -PPYOLO::PPYOLO(const std::string& model_file, const std::string& params_file, - const std::string& config_file, - const RuntimeOption& custom_option, - const Frontend& model_format) { - config_file_ = config_file; - valid_cpu_backends = {Backend::PDINFER}; - valid_gpu_backends = {Backend::PDINFER}; - has_nms_ = true; - runtime_option = custom_option; - runtime_option.model_format = model_format; - runtime_option.model_file = model_file; - runtime_option.params_file = params_file; - initialized = Initialize(); -} - -bool PPYOLO::Initialize() { - if (!BuildPreprocessPipelineFromConfig()) { - FDERROR << "Failed to build preprocess pipeline from configuration file." - << std::endl; - return false; - } - if (!InitRuntime()) { - FDERROR << "Failed to initialize fastdeploy backend." << std::endl; - return false; - } - return true; -} - -bool PPYOLO::Preprocess(Mat* mat, std::vector* outputs) { - int origin_w = mat->Width(); - int origin_h = mat->Height(); - for (size_t i = 0; i < processors_.size(); ++i) { - if (!(*(processors_[i].get()))(mat)) { - FDERROR << "Failed to process image data in " << processors_[i]->Name() - << "." << std::endl; - return false; - } - } - - outputs->resize(3); - (*outputs)[0].Allocate({1, 2}, FDDataType::FP32, "im_shape"); - (*outputs)[2].Allocate({1, 2}, FDDataType::FP32, "scale_factor"); - float* ptr0 = static_cast((*outputs)[0].MutableData()); - ptr0[0] = mat->Height(); - ptr0[1] = mat->Width(); - float* ptr2 = static_cast((*outputs)[2].MutableData()); - ptr2[0] = mat->Height() * 1.0 / origin_h; - ptr2[1] = mat->Width() * 1.0 / origin_w; - (*outputs)[1].name = "image"; - mat->ShareWithTensor(&((*outputs)[1])); - // reshape to [1, c, h, w] - (*outputs)[1].shape.insert((*outputs)[1].shape.begin(), 1); - return true; -} - -} // namespace detection -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/detection/ppdet/ppyolo.h b/csrcs/fastdeploy/vision/detection/ppdet/ppyolo.h deleted file mode 100644 index 1b3b48780..000000000 --- a/csrcs/fastdeploy/vision/detection/ppdet/ppyolo.h +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include "fastdeploy/vision/detection/ppdet/ppyoloe.h" - -namespace fastdeploy { -namespace vision { -namespace detection { - -class FASTDEPLOY_DECL PPYOLO : public PPYOLOE { - public: - PPYOLO(const std::string& model_file, const std::string& params_file, - const std::string& config_file, - const RuntimeOption& custom_option = RuntimeOption(), - const Frontend& model_format = Frontend::PADDLE); - - virtual std::string ModelName() const { return "PaddleDetection/PPYOLO"; } - - virtual bool Preprocess(Mat* mat, std::vector* outputs); - virtual bool Initialize(); - - protected: - PPYOLO() {} -}; - -class FASTDEPLOY_DECL PPYOLOv2 : public PPYOLO { - public: - PPYOLOv2(const std::string& model_file, const std::string& params_file, - const std::string& config_file, - const RuntimeOption& custom_option = RuntimeOption(), - const Frontend& model_format = Frontend::PADDLE) : PPYOLO(model_file, params_file, config_file, custom_option, model_format) { - } - - virtual std::string ModelName() const { return "PaddleDetection/PPYOLOv2"; } -}; - -} // namespace detection -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/detection/ppdet/ppyoloe.cc b/csrcs/fastdeploy/vision/detection/ppdet/ppyoloe.cc deleted file mode 100644 index 2e4b56ecb..000000000 --- a/csrcs/fastdeploy/vision/detection/ppdet/ppyoloe.cc +++ /dev/null @@ -1,258 +0,0 @@ -#include "fastdeploy/vision/detection/ppdet/ppyoloe.h" -#include "fastdeploy/vision/utils/utils.h" -#include "yaml-cpp/yaml.h" -#ifdef ENABLE_PADDLE_FRONTEND -#include "paddle2onnx/converter.h" -#endif - -namespace fastdeploy { -namespace vision { -namespace detection { - -PPYOLOE::PPYOLOE(const std::string& model_file, const std::string& params_file, - const std::string& config_file, - const RuntimeOption& custom_option, - const Frontend& model_format) { - config_file_ = config_file; - valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; - valid_gpu_backends = {Backend::PDINFER, Backend::ORT}; - runtime_option = custom_option; - runtime_option.model_format = model_format; - runtime_option.model_file = model_file; - runtime_option.params_file = params_file; - initialized = Initialize(); -} - -void PPYOLOE::GetNmsInfo() { - if (runtime_option.model_format == Frontend::PADDLE) { - std::string contents; - if (!ReadBinaryFromFile(runtime_option.model_file, &contents)) { - return; - } - auto reader = paddle2onnx::PaddleReader(contents.c_str(), contents.size()); - if (reader.has_nms) { - has_nms_ = true; - background_label = reader.nms_params.background_label; - keep_top_k = reader.nms_params.keep_top_k; - nms_eta = reader.nms_params.nms_eta; - nms_threshold = reader.nms_params.nms_threshold; - score_threshold = reader.nms_params.score_threshold; - nms_top_k = reader.nms_params.nms_top_k; - normalized = reader.nms_params.normalized; - } - } -} - -bool PPYOLOE::Initialize() { -#ifdef ENABLE_PADDLE_FRONTEND - // remove multiclass_nms3 now - // this is a trick operation for ppyoloe while inference on trt - GetNmsInfo(); - runtime_option.remove_multiclass_nms_ = true; - runtime_option.custom_op_info_["multiclass_nms3"] = "MultiClassNMS"; -#endif - if (!BuildPreprocessPipelineFromConfig()) { - FDERROR << "Failed to build preprocess pipeline from configuration file." - << std::endl; - return false; - } - if (!InitRuntime()) { - FDERROR << "Failed to initialize fastdeploy backend." << std::endl; - return false; - } - - if (has_nms_ && runtime_option.backend == Backend::TRT) { - FDINFO << "Detected operator multiclass_nms3 in your model, will replace " - "it with fastdeploy::backend::MultiClassNMS(background_label=" - << background_label << ", keep_top_k=" << keep_top_k - << ", nms_eta=" << nms_eta << ", nms_threshold=" << nms_threshold - << ", score_threshold=" << score_threshold - << ", nms_top_k=" << nms_top_k << ", normalized=" << normalized - << ")." << std::endl; - has_nms_ = false; - } - return true; -} - -bool PPYOLOE::BuildPreprocessPipelineFromConfig() { - processors_.clear(); - YAML::Node cfg; - try { - cfg = YAML::LoadFile(config_file_); - } catch (YAML::BadFile& e) { - FDERROR << "Failed to load yaml file " << config_file_ - << ", maybe you should check this file." << std::endl; - return false; - } - - processors_.push_back(std::make_shared()); - - for (const auto& op : cfg["Preprocess"]) { - std::string op_name = op["type"].as(); - if (op_name == "NormalizeImage") { - auto mean = op["mean"].as>(); - auto std = op["std"].as>(); - bool is_scale = op["is_scale"].as(); - processors_.push_back(std::make_shared(mean, std, is_scale)); - } else if (op_name == "Resize") { - bool keep_ratio = op["keep_ratio"].as(); - auto target_size = op["target_size"].as>(); - int interp = op["interp"].as(); - FDASSERT(target_size.size(), - "Require size of target_size be 2, but now it's " + - std::to_string(target_size.size()) + "."); - if (!keep_ratio) { - int width = target_size[1]; - int height = target_size[0]; - processors_.push_back( - std::make_shared(width, height, -1.0, -1.0, interp, false)); - } else { - int min_target_size = std::min(target_size[0], target_size[1]); - int max_target_size = std::max(target_size[0], target_size[1]); - processors_.push_back(std::make_shared( - min_target_size, interp, true, max_target_size)); - } - } else if (op_name == "Permute") { - // Do nothing, do permute as the last operation - continue; - // processors_.push_back(std::make_shared()); - } else if (op_name == "Pad") { - auto size = op["size"].as>(); - auto value = op["fill_value"].as>(); - processors_.push_back(std::make_shared("float")); - processors_.push_back( - std::make_shared(size[1], size[0], value)); - } else if (op_name == "PadStride") { - auto stride = op["stride"].as(); - processors_.push_back( - std::make_shared(stride, std::vector(3, 0))); - } else { - FDERROR << "Unexcepted preprocess operator: " << op_name << "." - << std::endl; - return false; - } - } - processors_.push_back(std::make_shared()); - return true; -} - -bool PPYOLOE::Preprocess(Mat* mat, std::vector* outputs) { - int origin_w = mat->Width(); - int origin_h = mat->Height(); - for (size_t i = 0; i < processors_.size(); ++i) { - if (!(*(processors_[i].get()))(mat)) { - FDERROR << "Failed to process image data in " << processors_[i]->Name() - << "." << std::endl; - return false; - } - } - - outputs->resize(2); - (*outputs)[0].name = InputInfoOfRuntime(0).name; - mat->ShareWithTensor(&((*outputs)[0])); - - // reshape to [1, c, h, w] - (*outputs)[0].shape.insert((*outputs)[0].shape.begin(), 1); - - (*outputs)[1].Allocate({1, 2}, FDDataType::FP32, InputInfoOfRuntime(1).name); - float* ptr = static_cast((*outputs)[1].MutableData()); - ptr[0] = mat->Height() * 1.0 / origin_h; - ptr[1] = mat->Width() * 1.0 / origin_w; - return true; -} - -bool PPYOLOE::Postprocess(std::vector& infer_result, - DetectionResult* result) { - FDASSERT(infer_result[1].shape[0] == 1, - "Only support batch = 1 in FastDeploy now."); - - if (!has_nms_) { - int boxes_index = 0; - int scores_index = 1; - if (infer_result[0].shape[1] == infer_result[1].shape[2]) { - boxes_index = 0; - scores_index = 1; - } else if (infer_result[0].shape[2] == infer_result[1].shape[1]) { - boxes_index = 1; - scores_index = 0; - } else { - FDERROR << "The shape of boxes and scores should be [batch, boxes_num, " - "4], [batch, classes_num, boxes_num]" - << std::endl; - return false; - } - - backend::MultiClassNMS nms; - nms.background_label = background_label; - nms.keep_top_k = keep_top_k; - nms.nms_eta = nms_eta; - nms.nms_threshold = nms_threshold; - nms.score_threshold = score_threshold; - nms.nms_top_k = nms_top_k; - nms.normalized = normalized; - nms.Compute(static_cast(infer_result[boxes_index].Data()), - static_cast(infer_result[scores_index].Data()), - infer_result[boxes_index].shape, - infer_result[scores_index].shape); - if (nms.out_num_rois_data[0] > 0) { - result->Reserve(nms.out_num_rois_data[0]); - } - for (size_t i = 0; i < nms.out_num_rois_data[0]; ++i) { - result->label_ids.push_back(nms.out_box_data[i * 6]); - result->scores.push_back(nms.out_box_data[i * 6 + 1]); - result->boxes.emplace_back(std::array{ - nms.out_box_data[i * 6 + 2], nms.out_box_data[i * 6 + 3], - nms.out_box_data[i * 6 + 4], nms.out_box_data[i * 6 + 5]}); - } - } else { - int box_num = 0; - if (infer_result[1].dtype == FDDataType::INT32) { - box_num = *(static_cast(infer_result[1].Data())); - } else if (infer_result[1].dtype == FDDataType::INT64) { - box_num = *(static_cast(infer_result[1].Data())); - } else { - FDASSERT( - false, - "The output box_num of PPYOLOE model should be type of int32/int64."); - } - result->Reserve(box_num); - float* box_data = static_cast(infer_result[0].Data()); - for (size_t i = 0; i < box_num; ++i) { - result->label_ids.push_back(box_data[i * 6]); - result->scores.push_back(box_data[i * 6 + 1]); - result->boxes.emplace_back( - std::array{box_data[i * 6 + 2], box_data[i * 6 + 3], - box_data[i * 6 + 4], box_data[i * 6 + 5]}); - } - } - return true; -} - -bool PPYOLOE::Predict(cv::Mat* im, DetectionResult* result) { - Mat mat(*im); - std::vector processed_data; - if (!Preprocess(&mat, &processed_data)) { - FDERROR << "Failed to preprocess input data while using model:" - << ModelName() << "." << std::endl; - return false; - } - - float* tmp = static_cast(processed_data[1].Data()); - std::vector infer_result; - if (!Infer(processed_data, &infer_result)) { - FDERROR << "Failed to inference while using model:" << ModelName() << "." - << std::endl; - return false; - } - - if (!Postprocess(infer_result, result)) { - FDERROR << "Failed to postprocess while using model:" << ModelName() << "." - << std::endl; - return false; - } - return true; -} - -} // namespace detection -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/detection/ppdet/ppyoloe.h b/csrcs/fastdeploy/vision/detection/ppdet/ppyoloe.h deleted file mode 100644 index 2d8cca99f..000000000 --- a/csrcs/fastdeploy/vision/detection/ppdet/ppyoloe.h +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include "fastdeploy/fastdeploy_model.h" -#include "fastdeploy/vision/common/processors/transform.h" -#include "fastdeploy/vision/common/result.h" - -#include "fastdeploy/vision/utils/utils.h" - -namespace fastdeploy { -namespace vision { -namespace detection { - -class FASTDEPLOY_DECL PPYOLOE : public FastDeployModel { - public: - PPYOLOE(const std::string& model_file, const std::string& params_file, - const std::string& config_file, - const RuntimeOption& custom_option = RuntimeOption(), - const Frontend& model_format = Frontend::PADDLE); - - virtual std::string ModelName() const { return "PaddleDetection/PPYOLOE"; } - - virtual bool Initialize(); - - virtual bool BuildPreprocessPipelineFromConfig(); - - virtual bool Preprocess(Mat* mat, std::vector* outputs); - - virtual bool Postprocess(std::vector& infer_result, - DetectionResult* result); - - virtual bool Predict(cv::Mat* im, DetectionResult* result); - - protected: - PPYOLOE() {} - - std::vector> processors_; - std::string config_file_; - // configuration for nms - int64_t background_label = -1; - int64_t keep_top_k = 300; - float nms_eta = 1.0; - float nms_threshold = 0.7; - float score_threshold = 0.01; - int64_t nms_top_k = 10000; - bool normalized = true; - bool has_nms_ = false; - - // This function will used to check if this model contains multiclass_nms - // and get parameters from the operator - void GetNmsInfo(); -}; - -} // namespace detection -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/detection/ppdet/rcnn.cc b/csrcs/fastdeploy/vision/detection/ppdet/rcnn.cc deleted file mode 100644 index 38ecc3d1c..000000000 --- a/csrcs/fastdeploy/vision/detection/ppdet/rcnn.cc +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/detection/ppdet/rcnn.h" - -namespace fastdeploy { -namespace vision { -namespace detection { - -FasterRCNN::FasterRCNN(const std::string& model_file, - const std::string& params_file, - const std::string& config_file, - const RuntimeOption& custom_option, - const Frontend& model_format) { - config_file_ = config_file; - valid_cpu_backends = {Backend::PDINFER}; - valid_gpu_backends = {Backend::PDINFER}; - has_nms_ = true; - runtime_option = custom_option; - runtime_option.model_format = model_format; - runtime_option.model_file = model_file; - runtime_option.params_file = params_file; - initialized = Initialize(); -} - -bool FasterRCNN::Initialize() { - if (!BuildPreprocessPipelineFromConfig()) { - FDERROR << "Failed to build preprocess pipeline from configuration file." - << std::endl; - return false; - } - if (!InitRuntime()) { - FDERROR << "Failed to initialize fastdeploy backend." << std::endl; - return false; - } - return true; -} - -bool FasterRCNN::Preprocess(Mat* mat, std::vector* outputs) { - int origin_w = mat->Width(); - int origin_h = mat->Height(); - float scale[2] = {1.0, 1.0}; - for (size_t i = 0; i < processors_.size(); ++i) { - if (!(*(processors_[i].get()))(mat)) { - FDERROR << "Failed to process image data in " << processors_[i]->Name() - << "." << std::endl; - return false; - } - if (processors_[i]->Name().find("Resize") != std::string::npos) { - scale[0] = mat->Height() * 1.0 / origin_h; - scale[1] = mat->Width() * 1.0 / origin_w; - } - } - - outputs->resize(3); - (*outputs)[0].Allocate({1, 2}, FDDataType::FP32, "im_shape"); - (*outputs)[2].Allocate({1, 2}, FDDataType::FP32, "scale_factor"); - float* ptr0 = static_cast((*outputs)[0].MutableData()); - ptr0[0] = mat->Height(); - ptr0[1] = mat->Width(); - float* ptr2 = static_cast((*outputs)[2].MutableData()); - ptr2[0] = scale[0]; - ptr2[1] = scale[1]; - (*outputs)[1].name = "image"; - mat->ShareWithTensor(&((*outputs)[1])); - // reshape to [1, c, h, w] - (*outputs)[1].shape.insert((*outputs)[1].shape.begin(), 1); - return true; -} - -} // namespace detection -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/detection/ppdet/rcnn.h b/csrcs/fastdeploy/vision/detection/ppdet/rcnn.h deleted file mode 100644 index d44ca852e..000000000 --- a/csrcs/fastdeploy/vision/detection/ppdet/rcnn.h +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include "fastdeploy/vision/detection/ppdet/ppyoloe.h" - -namespace fastdeploy { -namespace vision { -namespace detection { - -class FASTDEPLOY_DECL FasterRCNN : public PPYOLOE { - public: - FasterRCNN(const std::string& model_file, const std::string& params_file, - const std::string& config_file, - const RuntimeOption& custom_option = RuntimeOption(), - const Frontend& model_format = Frontend::PADDLE); - - virtual std::string ModelName() const { return "PaddleDetection/FasterRCNN"; } - - virtual bool Preprocess(Mat* mat, std::vector* outputs); - virtual bool Initialize(); - - protected: - FasterRCNN() {} -}; -} // namespace detection -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/detection/ppdet/yolov3.cc b/csrcs/fastdeploy/vision/detection/ppdet/yolov3.cc deleted file mode 100644 index 309d65640..000000000 --- a/csrcs/fastdeploy/vision/detection/ppdet/yolov3.cc +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/detection/ppdet/yolov3.h" - -namespace fastdeploy { -namespace vision { -namespace detection { - -YOLOv3::YOLOv3(const std::string& model_file, const std::string& params_file, - const std::string& config_file, - const RuntimeOption& custom_option, - const Frontend& model_format) { - config_file_ = config_file; - valid_cpu_backends = {Backend::PDINFER}; - valid_gpu_backends = {Backend::PDINFER}; - runtime_option = custom_option; - runtime_option.model_format = model_format; - runtime_option.model_file = model_file; - runtime_option.params_file = params_file; - initialized = Initialize(); -} - -bool YOLOv3::Preprocess(Mat* mat, std::vector* outputs) { - int origin_w = mat->Width(); - int origin_h = mat->Height(); - for (size_t i = 0; i < processors_.size(); ++i) { - if (!(*(processors_[i].get()))(mat)) { - FDERROR << "Failed to process image data in " << processors_[i]->Name() - << "." << std::endl; - return false; - } - } - - outputs->resize(3); - (*outputs)[0].Allocate({1, 2}, FDDataType::FP32, "im_shape"); - (*outputs)[2].Allocate({1, 2}, FDDataType::FP32, "scale_factor"); - float* ptr0 = static_cast((*outputs)[0].MutableData()); - ptr0[0] = mat->Height(); - ptr0[1] = mat->Width(); - float* ptr2 = static_cast((*outputs)[2].MutableData()); - ptr2[0] = mat->Height() * 1.0 / origin_h; - ptr2[1] = mat->Width() * 1.0 / origin_w; - (*outputs)[1].name = "image"; - mat->ShareWithTensor(&((*outputs)[1])); - // reshape to [1, c, h, w] - (*outputs)[1].shape.insert((*outputs)[1].shape.begin(), 1); - return true; -} - -} // namespace detection -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/detection/ppdet/yolov3.h b/csrcs/fastdeploy/vision/detection/ppdet/yolov3.h deleted file mode 100644 index 1b65bfca1..000000000 --- a/csrcs/fastdeploy/vision/detection/ppdet/yolov3.h +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include "fastdeploy/vision/detection/ppdet/ppyoloe.h" - -namespace fastdeploy { -namespace vision { -namespace detection { - -class FASTDEPLOY_DECL YOLOv3 : public PPYOLOE { - public: - YOLOv3(const std::string& model_file, const std::string& params_file, - const std::string& config_file, - const RuntimeOption& custom_option = RuntimeOption(), - const Frontend& model_format = Frontend::PADDLE); - - virtual std::string ModelName() const { return "PaddleDetection/YOLOv3"; } - - virtual bool Preprocess(Mat* mat, std::vector* outputs); -}; -} // namespace detection -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/detection/ppdet/yolox.cc b/csrcs/fastdeploy/vision/detection/ppdet/yolox.cc deleted file mode 100644 index a60ebfcc4..000000000 --- a/csrcs/fastdeploy/vision/detection/ppdet/yolox.cc +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/detection/ppdet/yolox.h" - -namespace fastdeploy { -namespace vision { -namespace detection { - -PaddleYOLOX::PaddleYOLOX(const std::string& model_file, const std::string& params_file, - const std::string& config_file, const RuntimeOption& custom_option, - const Frontend& model_format) { - config_file_ = config_file; - valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; - valid_gpu_backends = {Backend::PDINFER, Backend::ORT}; - runtime_option = custom_option; - runtime_option.model_format = model_format; - runtime_option.model_file = model_file; - runtime_option.params_file = params_file; - background_label = -1; - keep_top_k = 1000; - nms_eta = 1; - nms_threshold = 0.65; - nms_top_k = 10000; - normalized = true; - score_threshold = 0.001; - initialized = Initialize(); -} - -bool PaddleYOLOX::Preprocess(Mat* mat, std::vector* outputs) { - int origin_w = mat->Width(); - int origin_h = mat->Height(); - float scale[2] = {1.0, 1.0}; - for (size_t i = 0; i < processors_.size(); ++i) { - if (!(*(processors_[i].get()))(mat)) { - FDERROR << "Failed to process image data in " << processors_[i]->Name() - << "." << std::endl; - return false; - } - if (processors_[i]->Name().find("Resize") != std::string::npos) { - scale[0] = mat->Height() * 1.0 / origin_h; - scale[1] = mat->Width() * 1.0 / origin_w; - } - } - - outputs->resize(2); - (*outputs)[0].name = InputInfoOfRuntime(0).name; - mat->ShareWithTensor(&((*outputs)[0])); - - // reshape to [1, c, h, w] - (*outputs)[0].shape.insert((*outputs)[0].shape.begin(), 1); - - (*outputs)[1].Allocate({1, 2}, FDDataType::FP32, InputInfoOfRuntime(1).name); - float* ptr = static_cast((*outputs)[1].MutableData()); - ptr[0] = scale[0]; - ptr[1] = scale[1]; - return true; -} -} // namespace detection -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/detection/ppdet/yolox.h b/csrcs/fastdeploy/vision/detection/ppdet/yolox.h deleted file mode 100644 index 4ffe2f39c..000000000 --- a/csrcs/fastdeploy/vision/detection/ppdet/yolox.h +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include "fastdeploy/vision/detection/ppdet/ppyoloe.h" - -namespace fastdeploy { -namespace vision { -namespace detection { - -class FASTDEPLOY_DECL PaddleYOLOX : public PPYOLOE { - public: - PaddleYOLOX(const std::string& model_file, const std::string& params_file, - const std::string& config_file, - const RuntimeOption& custom_option = RuntimeOption(), - const Frontend& model_format = Frontend::PADDLE); - - virtual bool Preprocess(Mat* mat, std::vector* outputs); - - virtual std::string ModelName() const { return "PaddleDetection/YOLOX"; } -}; -} // namespace detection -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/facedet/contrib/retinaface.cc b/csrcs/fastdeploy/vision/facedet/contrib/retinaface.cc deleted file mode 100644 index ebb52010e..000000000 --- a/csrcs/fastdeploy/vision/facedet/contrib/retinaface.cc +++ /dev/null @@ -1,310 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/facedet/contrib/retinaface.h" -#include "fastdeploy/utils/perf.h" -#include "fastdeploy/vision/utils/utils.h" - -namespace fastdeploy { - -namespace vision { - -namespace facedet { - -struct RetinaAnchor { - float cx; - float cy; - float s_kx; - float s_ky; -}; - -void GenerateRetinaAnchors(const std::vector& size, - const std::vector& downsample_strides, - const std::vector>& min_sizes, - std::vector* anchors) { - // size: tuple of input (width, height) - // downsample_strides: downsample strides (steps), e.g (8,16,32) - // min_sizes: width and height for each anchor, - // e.g {{16, 32}, {64, 128}, {256, 512}} - int h = size[1]; - int w = size[0]; - std::vector> feature_maps; - for (auto s : downsample_strides) { - feature_maps.push_back( - {static_cast( - std::ceil(static_cast(h) / static_cast(s))), - static_cast( - std::ceil(static_cast(w) / static_cast(s)))}); - } - - (*anchors).clear(); - const size_t num_feature_map = feature_maps.size(); - // reference: layers/functions/prior_box.py#L21 - for (size_t k = 0; k < num_feature_map; ++k) { - auto f_map = feature_maps.at(k); // e.g [640//8,640//8] - auto tmp_min_sizes = min_sizes.at(k); // e.g [8,16] - int f_h = f_map.at(0); - int f_w = f_map.at(1); - for (size_t i = 0; i < f_h; ++i) { - for (size_t j = 0; j < f_w; ++j) { - for (auto min_size : tmp_min_sizes) { - float s_kx = - static_cast(min_size) / static_cast(w); // e.g 16/w - float s_ky = - static_cast(min_size) / static_cast(h); // e.g 16/h - // (x + 0.5) * step / w normalized loc mapping to input width - // (y + 0.5) * step / h normalized loc mapping to input height - float s = static_cast(downsample_strides.at(k)); - float cx = (static_cast(j) + 0.5f) * s / static_cast(w); - float cy = (static_cast(i) + 0.5f) * s / static_cast(h); - (*anchors).emplace_back( - RetinaAnchor{cx, cy, s_kx, s_ky}); // without clip - } - } - } - } -} - -RetinaFace::RetinaFace(const std::string& model_file, - const std::string& params_file, - const RuntimeOption& custom_option, - const Frontend& model_format) { - if (model_format == Frontend::ONNX) { - valid_cpu_backends = {Backend::ORT}; // 指定可用的CPU后端 - valid_gpu_backends = {Backend::ORT, Backend::TRT}; // 指定可用的GPU后端 - } else { - valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; - valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; - } - runtime_option = custom_option; - runtime_option.model_format = model_format; - runtime_option.model_file = model_file; - runtime_option.params_file = params_file; - initialized = Initialize(); -} - -bool RetinaFace::Initialize() { - // parameters for preprocess - size = {640, 640}; - variance = {0.1f, 0.2f}; - downsample_strides = {8, 16, 32}; - min_sizes = {{16, 32}, {64, 128}, {256, 512}}; - landmarks_per_face = 5; - - if (!InitRuntime()) { - FDERROR << "Failed to initialize fastdeploy backend." << std::endl; - return false; - } - // Check if the input shape is dynamic after Runtime already initialized, - is_dynamic_input_ = false; - auto shape = InputInfoOfRuntime(0).shape; - for (int i = 0; i < shape.size(); ++i) { - // if height or width is dynamic - if (i >= 2 && shape[i] <= 0) { - is_dynamic_input_ = true; - break; - } - } - return true; -} - -bool RetinaFace::Preprocess( - Mat* mat, FDTensor* output, - std::map>* im_info) { - // retinaface's preprocess steps - // 1. Resize - // 2. Convert(opencv style) or Normalize - // 3. HWC->CHW - int resize_w = size[0]; - int resize_h = size[1]; - if (resize_h != mat->Height() || resize_w != mat->Width()) { - Resize::Run(mat, resize_w, resize_h); - } - - // Compute `result = mat * alpha + beta` directly by channel - // Reference: detect.py#L94 - std::vector alpha = {1.f, 1.f, 1.f}; - std::vector beta = {-104.f, -117.f, -123.f}; // BGR; - Convert::Run(mat, alpha, beta); - - // Record output shape of preprocessed image - (*im_info)["output_shape"] = {static_cast(mat->Height()), - static_cast(mat->Width())}; - - HWC2CHW::Run(mat); - Cast::Run(mat, "float"); - mat->ShareWithTensor(output); - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c - return true; -} - -bool RetinaFace::Postprocess( - std::vector& infer_result, FaceDetectionResult* result, - const std::map>& im_info, - float conf_threshold, float nms_iou_threshold) { - // retinaface has 3 output tensors, boxes & conf & landmarks - FDASSERT( - (infer_result.size() == 3), - "The default number of output tensor must be 3 according to retinaface."); - FDTensor& boxes_tensor = infer_result.at(0); // (1,n,4) - FDTensor& conf_tensor = infer_result.at(1); // (1,n,2) - FDTensor& landmarks_tensor = infer_result.at(2); // (1,n,10) - FDASSERT((boxes_tensor.shape[0] == 1), "Only support batch =1 now."); - if (boxes_tensor.dtype != FDDataType::FP32) { - FDERROR << "Only support post process with float32 data." << std::endl; - return false; - } - - result->Clear(); - // must be setup landmarks_per_face before reserve - result->landmarks_per_face = landmarks_per_face; - result->Reserve(boxes_tensor.shape[1]); - - float* boxes_ptr = static_cast(boxes_tensor.Data()); - float* conf_ptr = static_cast(conf_tensor.Data()); - float* landmarks_ptr = static_cast(landmarks_tensor.Data()); - const size_t num_bboxes = boxes_tensor.shape[1]; // n - // fetch original image shape - auto iter_ipt = im_info.find("input_shape"); - FDASSERT((iter_ipt != im_info.end()), - "Cannot find input_shape from im_info."); - float ipt_h = iter_ipt->second[0]; - float ipt_w = iter_ipt->second[1]; - - // generate anchors with dowmsample strides - std::vector anchors; - GenerateRetinaAnchors(size, downsample_strides, min_sizes, &anchors); - - // decode bounding boxes - for (size_t i = 0; i < num_bboxes; ++i) { - float confidence = conf_ptr[2 * i + 1]; - // filter boxes by conf_threshold - if (confidence <= conf_threshold) { - continue; - } - float prior_cx = anchors.at(i).cx; - float prior_cy = anchors.at(i).cy; - float prior_s_kx = anchors.at(i).s_kx; - float prior_s_ky = anchors.at(i).s_ky; - - // fetch offsets (dx,dy,dw,dh) - float dx = boxes_ptr[4 * i + 0]; - float dy = boxes_ptr[4 * i + 1]; - float dw = boxes_ptr[4 * i + 2]; - float dh = boxes_ptr[4 * i + 3]; - // reference: Pytorch_Retinaface/utils/box_utils.py - float x = prior_cx + dx * variance[0] * prior_s_kx; - float y = prior_cy + dy * variance[0] * prior_s_ky; - float w = prior_s_kx * std::exp(dw * variance[1]); - float h = prior_s_ky * std::exp(dh * variance[1]); // (0.~1.) - // from (x,y,w,h) to (x1,y1,x2,y2) - float x1 = (x - w / 2.f) * ipt_w; - float y1 = (y - h / 2.f) * ipt_h; - float x2 = (x + w / 2.f) * ipt_w; - float y2 = (y + h / 2.f) * ipt_h; - result->boxes.emplace_back(std::array{x1, y1, x2, y2}); - result->scores.push_back(confidence); - // decode landmarks (default 5 landmarks) - if (landmarks_per_face > 0) { - // reference: utils/box_utils.py#L241 - for (size_t j = 0; j < landmarks_per_face * 2; j += 2) { - float ldx = landmarks_ptr[i * (landmarks_per_face * 2) + (j + 0)]; - float ldy = landmarks_ptr[i * (landmarks_per_face * 2) + (j + 1)]; - float lx = (prior_cx + ldx * variance[0] * prior_s_kx) * ipt_w; - float ly = (prior_cy + ldy * variance[0] * prior_s_ky) * ipt_h; - result->landmarks.emplace_back(std::array{lx, ly}); - } - } - } - - if (result->boxes.size() == 0) { - return true; - } - - utils::NMS(result, nms_iou_threshold); - - // scale and clip box - for (size_t i = 0; i < result->boxes.size(); ++i) { - result->boxes[i][0] = std::max(result->boxes[i][0], 0.0f); - result->boxes[i][1] = std::max(result->boxes[i][1], 0.0f); - result->boxes[i][2] = std::max(result->boxes[i][2], 0.0f); - result->boxes[i][3] = std::max(result->boxes[i][3], 0.0f); - result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f); - result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f); - result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f); - result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f); - } - // scale and clip landmarks - for (size_t i = 0; i < result->landmarks.size(); ++i) { - result->landmarks[i][0] = std::max(result->landmarks[i][0], 0.0f); - result->landmarks[i][1] = std::max(result->landmarks[i][1], 0.0f); - result->landmarks[i][0] = std::min(result->landmarks[i][0], ipt_w - 1.0f); - result->landmarks[i][1] = std::min(result->landmarks[i][1], ipt_h - 1.0f); - } - return true; -} - -bool RetinaFace::Predict(cv::Mat* im, FaceDetectionResult* result, - float conf_threshold, float nms_iou_threshold) { -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_START(0) -#endif - - Mat mat(*im); - std::vector input_tensors(1); - - std::map> im_info; - - // Record the shape of image and the shape of preprocessed image - im_info["input_shape"] = {static_cast(mat.Height()), - static_cast(mat.Width())}; - im_info["output_shape"] = {static_cast(mat.Height()), - static_cast(mat.Width())}; - - if (!Preprocess(&mat, &input_tensors[0], &im_info)) { - FDERROR << "Failed to preprocess input image." << std::endl; - return false; - } - -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_END(0, "Preprocess") - TIMERECORD_START(1) -#endif - - input_tensors[0].name = InputInfoOfRuntime(0).name; - std::vector output_tensors; - if (!Infer(input_tensors, &output_tensors)) { - FDERROR << "Failed to inference." << std::endl; - return false; - } -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_END(1, "Inference") - TIMERECORD_START(2) -#endif - - if (!Postprocess(output_tensors, result, im_info, conf_threshold, - nms_iou_threshold)) { - FDERROR << "Failed to post process." << std::endl; - return false; - } - -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_END(2, "Postprocess") -#endif - return true; -} - -} // namespace facedet -} // namespace vision -} // namespace fastdeploy \ No newline at end of file diff --git a/csrcs/fastdeploy/vision/facedet/contrib/retinaface.h b/csrcs/fastdeploy/vision/facedet/contrib/retinaface.h deleted file mode 100644 index e1ef50e2e..000000000 --- a/csrcs/fastdeploy/vision/facedet/contrib/retinaface.h +++ /dev/null @@ -1,92 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include "fastdeploy/fastdeploy_model.h" -#include "fastdeploy/vision/common/processors/transform.h" -#include "fastdeploy/vision/common/result.h" - -namespace fastdeploy { - -namespace vision { - -namespace facedet { - -class FASTDEPLOY_DECL RetinaFace : public FastDeployModel { - public: - // 当model_format为ONNX时,无需指定params_file - // 当model_format为Paddle时,则需同时指定model_file & params_file - RetinaFace(const std::string& model_file, const std::string& params_file = "", - const RuntimeOption& custom_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX); - - // 定义模型的名称 - std::string ModelName() const { return "Pytorch_Retinaface"; } - - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 - // conf_threshold 为后处理的参数 - // nms_iou_threshold 为后处理的参数 - virtual bool Predict(cv::Mat* im, FaceDetectionResult* result, - float conf_threshold = 0.25f, - float nms_iou_threshold = 0.4f); - - // 以下为模型在预测时的一些参数,基本是前后处理所需 - // 用户在创建模型后,可根据模型的要求,以及自己的需求 - // 对参数进行修改 - // tuple of (width, height), default (640, 640) - std::vector size; - // variance in RetinaFace's prior-box(anchor) generate process, - // default (0.1, 0.2) - std::vector variance; - // downsample strides (namely, steps) for RetinaFace to - // generate anchors, will take (8,16,32) as default values. - std::vector downsample_strides; - // min sizes, width and height for each anchor. - std::vector> min_sizes; - // landmarks_per_face, default 5 in RetinaFace - int landmarks_per_face; - - private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 - bool Initialize(); - - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 - // im_info为预处理过程保存的数据,在后处理中需要用到 - bool Preprocess(Mat* mat, FDTensor* output, - std::map>* im_info); - - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 - // im_info 为预处理记录的信息,后处理用于还原box - // conf_threshold 后处理时过滤box的置信度阈值 - // nms_iou_threshold 后处理时NMS设定的iou阈值 - bool Postprocess(std::vector& infer_result, - FaceDetectionResult* result, - const std::map>& im_info, - float conf_threshold, float nms_iou_threshold); - - // 查看输入是否为动态维度的 不建议直接使用 不同模型的逻辑可能不一致 - bool IsDynamicInput() const { return is_dynamic_input_; } - - bool is_dynamic_input_; -}; - -} // namespace facedet -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/facedet/contrib/retinaface_pybind.cc b/csrcs/fastdeploy/vision/facedet/contrib/retinaface_pybind.cc deleted file mode 100644 index 9419327c4..000000000 --- a/csrcs/fastdeploy/vision/facedet/contrib/retinaface_pybind.cc +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/pybind/main.h" - -namespace fastdeploy { -void BindRetinaFace(pybind11::module& m) { - pybind11::class_(m, - "RetinaFace") - .def(pybind11::init()) - .def("predict", - [](vision::facedet::RetinaFace& self, pybind11::array& data, - float conf_threshold, float nms_iou_threshold) { - auto mat = PyArrayToCvMat(data); - vision::FaceDetectionResult res; - self.Predict(&mat, &res, conf_threshold, nms_iou_threshold); - return res; - }) - .def_readwrite("size", &vision::facedet::RetinaFace::size) - .def_readwrite("variance", &vision::facedet::RetinaFace::variance) - .def_readwrite("downsample_strides", - &vision::facedet::RetinaFace::downsample_strides) - .def_readwrite("min_sizes", &vision::facedet::RetinaFace::min_sizes) - .def_readwrite("landmarks_per_face", - &vision::facedet::RetinaFace::landmarks_per_face); -} -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/facedet/contrib/scrfd.cc b/csrcs/fastdeploy/vision/facedet/contrib/scrfd.cc deleted file mode 100644 index ffcff65c9..000000000 --- a/csrcs/fastdeploy/vision/facedet/contrib/scrfd.cc +++ /dev/null @@ -1,369 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/facedet/contrib/scrfd.h" -#include "fastdeploy/utils/perf.h" -#include "fastdeploy/vision/utils/utils.h" - -namespace fastdeploy { - -namespace vision { - -namespace facedet { - -void SCRFD::LetterBox(Mat* mat, const std::vector& size, - const std::vector& color, bool _auto, - bool scale_fill, bool scale_up, int stride) { - float scale = - std::min(size[1] * 1.0 / mat->Height(), size[0] * 1.0 / mat->Width()); - if (!scale_up) { - scale = std::min(scale, 1.0f); - } - - int resize_h = int(round(mat->Height() * scale)); - int resize_w = int(round(mat->Width() * scale)); - - int pad_w = size[0] - resize_w; - int pad_h = size[1] - resize_h; - if (_auto) { - pad_h = pad_h % stride; - pad_w = pad_w % stride; - } else if (scale_fill) { - pad_h = 0; - pad_w = 0; - resize_h = size[1]; - resize_w = size[0]; - } - if (resize_h != mat->Height() || resize_w != mat->Width()) { - Resize::Run(mat, resize_w, resize_h); - } - if (pad_h > 0 || pad_w > 0) { - float half_h = pad_h * 1.0 / 2; - int top = int(round(half_h - 0.1)); - int bottom = int(round(half_h + 0.1)); - float half_w = pad_w * 1.0 / 2; - int left = int(round(half_w - 0.1)); - int right = int(round(half_w + 0.1)); - Pad::Run(mat, top, bottom, left, right, color); - } -} - -SCRFD::SCRFD(const std::string& model_file, const std::string& params_file, - const RuntimeOption& custom_option, const Frontend& model_format) { - if (model_format == Frontend::ONNX) { - valid_cpu_backends = {Backend::ORT}; // 指定可用的CPU后端 - valid_gpu_backends = {Backend::ORT, Backend::TRT}; // 指定可用的GPU后端 - } else { - valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; - valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; - } - runtime_option = custom_option; - runtime_option.model_format = model_format; - runtime_option.model_file = model_file; - runtime_option.params_file = params_file; - initialized = Initialize(); -} - -bool SCRFD::Initialize() { - // parameters for preprocess - use_kps = true; - size = {640, 640}; - padding_value = {0.0, 0.0, 0.0}; - is_mini_pad = false; - is_no_pad = false; - is_scale_up = false; - stride = 32; - downsample_strides = {8, 16, 32}; - num_anchors = 2; - landmarks_per_face = 5; - center_points_is_update_ = false; - max_nms = 30000; - // num_outputs = use_kps ? 9 : 6; - if (!InitRuntime()) { - FDERROR << "Failed to initialize fastdeploy backend." << std::endl; - return false; - } - // Check if the input shape is dynamic after Runtime already initialized, - // Note that, We need to force is_mini_pad 'false' to keep static - // shape after padding (LetterBox) when the is_dynamic_shape is 'false'. - is_dynamic_input_ = false; - auto shape = InputInfoOfRuntime(0).shape; - for (int i = 0; i < shape.size(); ++i) { - // if height or width is dynamic - if (i >= 2 && shape[i] <= 0) { - is_dynamic_input_ = true; - break; - } - } - if (!is_dynamic_input_) { - is_mini_pad = false; - } - - return true; -} - -bool SCRFD::Preprocess(Mat* mat, FDTensor* output, - std::map>* im_info) { - float ratio = std::min(size[1] * 1.0f / static_cast(mat->Height()), - size[0] * 1.0f / static_cast(mat->Width())); - if (ratio != 1.0) { - int interp = cv::INTER_AREA; - if (ratio > 1.0) { - interp = cv::INTER_LINEAR; - } - int resize_h = int(mat->Height() * ratio); - int resize_w = int(mat->Width() * ratio); - Resize::Run(mat, resize_w, resize_h, -1, -1, interp); - } - // scrfd's preprocess steps - // 1. letterbox - // 2. BGR->RGB - // 3. HWC->CHW - SCRFD::LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad, - is_scale_up, stride); - - BGR2RGB::Run(mat); - // Normalize::Run(mat, std::vector(mat->Channels(), 0.0), - // std::vector(mat->Channels(), 1.0)); - // Compute `result = mat * alpha + beta` directly by channel - // Original Repo/tools/scrfd.py: cv2.dnn.blobFromImage(img, 1.0/128, - // input_size, (127.5, 127.5, 127.5), swapRB=True) - std::vector alpha = {1.f / 128.f, 1.f / 128.f, 1.f / 128.f}; - std::vector beta = {-127.5f / 128.f, -127.5f / 128.f, -127.5f / 128.f}; - Convert::Run(mat, alpha, beta); - // Record output shape of preprocessed image - (*im_info)["output_shape"] = {static_cast(mat->Height()), - static_cast(mat->Width())}; - HWC2CHW::Run(mat); - Cast::Run(mat, "float"); - mat->ShareWithTensor(output); - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c - return true; -} - -void SCRFD::GeneratePoints() { - if (center_points_is_update_ && !is_dynamic_input_) { - return; - } - // 8, 16, 32 - for (auto local_stride : downsample_strides) { - unsigned int num_grid_w = size[0] / local_stride; - unsigned int num_grid_h = size[1] / local_stride; - // y - for (unsigned int i = 0; i < num_grid_h; ++i) { - // x - for (unsigned int j = 0; j < num_grid_w; ++j) { - // num_anchors, col major - for (unsigned int k = 0; k < num_anchors; ++k) { - SCRFDPoint point; - point.cx = static_cast(j); - point.cy = static_cast(i); - center_points_[local_stride].push_back(point); - } - } - } - } - - center_points_is_update_ = true; -} - -bool SCRFD::Postprocess( - std::vector& infer_result, FaceDetectionResult* result, - const std::map>& im_info, - float conf_threshold, float nms_iou_threshold) { - // number of downsample_strides - int fmc = downsample_strides.size(); - // scrfd has 6,9,10,15 output tensors - FDASSERT((infer_result.size() == 9 || infer_result.size() == 6 || - infer_result.size() == 10 || infer_result.size() == 15), - "The default number of output tensor must be 6, 9, 10, or 15 " - "according to scrfd."); - FDASSERT((fmc == 3 || fmc == 5), "The fmc must be 3 or 5"); - FDASSERT((infer_result.at(0).shape[0] == 1), "Only support batch =1 now."); - for (int i = 0; i < fmc; ++i) { - if (infer_result.at(i).dtype != FDDataType::FP32) { - FDERROR << "Only support post process with float32 data." << std::endl; - return false; - } - } - int total_num_boxes = 0; - // compute the reserve space. - for (int f = 0; f < fmc; ++f) { - total_num_boxes += infer_result.at(f).shape[1]; - }; - GeneratePoints(); - result->Clear(); - // scale the boxes to the origin image shape - auto iter_out = im_info.find("output_shape"); - auto iter_ipt = im_info.find("input_shape"); - FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(), - "Cannot find input_shape or output_shape from im_info."); - float out_h = iter_out->second[0]; - float out_w = iter_out->second[1]; - float ipt_h = iter_ipt->second[0]; - float ipt_w = iter_ipt->second[1]; - float scale = std::min(out_h / ipt_h, out_w / ipt_w); - float pad_h = (out_h - ipt_h * scale) / 2.0f; - float pad_w = (out_w - ipt_w * scale) / 2.0f; - if (is_mini_pad) { - // 和 LetterBox中_auto=true的处理逻辑对应 - pad_h = static_cast(static_cast(pad_h) % stride); - pad_w = static_cast(static_cast(pad_w) % stride); - } - // must be setup landmarks_per_face before reserve - result->landmarks_per_face = landmarks_per_face; - result->Reserve(total_num_boxes); - unsigned int count = 0; - // loop each stride - for (int f = 0; f < fmc; ++f) { - float* score_ptr = static_cast(infer_result.at(f).Data()); - float* bbox_ptr = static_cast(infer_result.at(f + fmc).Data()); - const unsigned int num_points = infer_result.at(f).shape[1]; - int current_stride = downsample_strides[f]; - auto& stride_points = center_points_[current_stride]; - // loop each anchor - for (unsigned int i = 0; i < num_points; ++i) { - const float cls_conf = score_ptr[i]; - if (cls_conf < conf_threshold) continue; // filter - auto& point = stride_points.at(i); - const float cx = point.cx; // cx - const float cy = point.cy; // cy - // bbox - const float* offsets = bbox_ptr + i * 4; - float l = offsets[0]; // left - float t = offsets[1]; // top - float r = offsets[2]; // right - float b = offsets[3]; // bottom - - float x1 = ((cx - l) * static_cast(current_stride) - - static_cast(pad_w)) / - scale; // cx - l x1 - float y1 = ((cy - t) * static_cast(current_stride) - - static_cast(pad_h)) / - scale; // cy - t y1 - float x2 = ((cx + r) * static_cast(current_stride) - - static_cast(pad_w)) / - scale; // cx + r x2 - float y2 = ((cy + b) * static_cast(current_stride) - - static_cast(pad_h)) / - scale; // cy + b y2 - result->boxes.emplace_back(std::array{x1, y1, x2, y2}); - result->scores.push_back(cls_conf); - if (use_kps) { - float* landmarks_ptr = - static_cast(infer_result.at(f + 2 * fmc).Data()); - // landmarks - const float* kps_offsets = landmarks_ptr + i * (landmarks_per_face * 2); - for (unsigned int j = 0; j < landmarks_per_face * 2; j += 2) { - float kps_l = kps_offsets[j]; - float kps_t = kps_offsets[j + 1]; - float kps_x = ((cx + kps_l) * static_cast(current_stride) - - static_cast(pad_w)) / - scale; // cx + l x - float kps_y = ((cy + kps_t) * static_cast(current_stride) - - static_cast(pad_h)) / - scale; // cy + t y - result->landmarks.emplace_back(std::array{kps_x, kps_y}); - } - } - count += 1; // limit boxes for nms. - if (count > max_nms) { - break; - } - } - } - - // fetch original image shape - FDASSERT((iter_ipt != im_info.end()), - "Cannot find input_shape from im_info."); - - if (result->boxes.size() == 0) { - return true; - } - - utils::NMS(result, nms_iou_threshold); - - // scale and clip box - for (size_t i = 0; i < result->boxes.size(); ++i) { - result->boxes[i][0] = std::max(result->boxes[i][0], 0.0f); - result->boxes[i][1] = std::max(result->boxes[i][1], 0.0f); - result->boxes[i][2] = std::max(result->boxes[i][2], 0.0f); - result->boxes[i][3] = std::max(result->boxes[i][3], 0.0f); - result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f); - result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f); - result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f); - result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f); - } - // scale and clip landmarks - for (size_t i = 0; i < result->landmarks.size(); ++i) { - result->landmarks[i][0] = std::max(result->landmarks[i][0], 0.0f); - result->landmarks[i][1] = std::max(result->landmarks[i][1], 0.0f); - result->landmarks[i][0] = std::min(result->landmarks[i][0], ipt_w - 1.0f); - result->landmarks[i][1] = std::min(result->landmarks[i][1], ipt_h - 1.0f); - } - return true; -} - -bool SCRFD::Predict(cv::Mat* im, FaceDetectionResult* result, - float conf_threshold, float nms_iou_threshold) { -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_START(0) -#endif - Mat mat(*im); - std::vector input_tensors(1); - - std::map> im_info; - - // Record the shape of image and the shape of preprocessed image - im_info["input_shape"] = {static_cast(mat.Height()), - static_cast(mat.Width())}; - im_info["output_shape"] = {static_cast(mat.Height()), - static_cast(mat.Width())}; - - if (!Preprocess(&mat, &input_tensors[0], &im_info)) { - FDERROR << "Failed to preprocess input image." << std::endl; - return false; - } - -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_END(0, "Preprocess") - TIMERECORD_START(1) -#endif - - input_tensors[0].name = InputInfoOfRuntime(0).name; - std::vector output_tensors; - if (!Infer(input_tensors, &output_tensors)) { - FDERROR << "Failed to inference." << std::endl; - return false; - } -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_END(1, "Inference") - TIMERECORD_START(2) -#endif - - if (!Postprocess(output_tensors, result, im_info, conf_threshold, - nms_iou_threshold)) { - FDERROR << "Failed to post process." << std::endl; - return false; - } - -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_END(2, "Postprocess") -#endif - return true; -} - -} // namespace facedet -} // namespace vision -} // namespace fastdeploy \ No newline at end of file diff --git a/csrcs/fastdeploy/vision/facedet/contrib/scrfd.h b/csrcs/fastdeploy/vision/facedet/contrib/scrfd.h deleted file mode 100644 index 398301363..000000000 --- a/csrcs/fastdeploy/vision/facedet/contrib/scrfd.h +++ /dev/null @@ -1,122 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include -#include "fastdeploy/fastdeploy_model.h" -#include "fastdeploy/vision/common/processors/transform.h" -#include "fastdeploy/vision/common/result.h" - -namespace fastdeploy { - -namespace vision { - -namespace facedet { - -class FASTDEPLOY_DECL SCRFD : public FastDeployModel { - public: - // 当model_format为ONNX时,无需指定params_file - // 当model_format为Paddle时,则需同时指定model_file & params_file - SCRFD(const std::string& model_file, const std::string& params_file = "", - const RuntimeOption& custom_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX); - - // 定义模型的名称 - std::string ModelName() const { return "scrfd"; } - - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 - // conf_threshold 为后处理的参数 - // nms_iou_threshold 为后处理的参数 - virtual bool Predict(cv::Mat* im, FaceDetectionResult* result, - float conf_threshold = 0.25f, - float nms_iou_threshold = 0.4f); - - // 以下为模型在预测时的一些参数,基本是前后处理所需 - // 用户在创建模型后,可根据模型的要求,以及自己的需求 - // 对参数进行修改 - // tuple of (width, height), default (640, 640) - std::vector size; - // downsample strides (namely, steps) for SCRFD to - // generate anchors, will take (8,16,32) as default values. - // padding value, size should be same with Channels - std::vector padding_value; - // only pad to the minimum rectange which height and width is times of stride - bool is_mini_pad; - // while is_mini_pad = false and is_no_pad = true, will resize the image to - // the set size - bool is_no_pad; - // if is_scale_up is false, the input image only can be zoom out, the maximum - // resize scale cannot exceed 1.0 - bool is_scale_up; - // padding stride, for is_mini_pad - int stride; - // for offseting the boxes by classes when using NMS - std::vector downsample_strides; - // landmarks_per_face, default 5 in SCRFD - int landmarks_per_face; - // are the outputs of onnx file with key points features or not - bool use_kps; - // the upperbond number of boxes processed by nms. - int max_nms; - // number anchors of each stride - unsigned int num_anchors; - - private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 - bool Initialize(); - - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 - // im_info为预处理过程保存的数据,在后处理中需要用到 - bool Preprocess(Mat* mat, FDTensor* output, - std::map>* im_info); - - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 - // im_info 为预处理记录的信息,后处理用于还原box - // conf_threshold 后处理时过滤box的置信度阈值 - // nms_iou_threshold 后处理时NMS设定的iou阈值 - bool Postprocess(std::vector& infer_result, - FaceDetectionResult* result, - const std::map>& im_info, - float conf_threshold, float nms_iou_threshold); - - void GeneratePoints(); - - // 对图片进行LetterBox处理 - // mat 为读取到的原图 - // size 为输入模型的图像尺寸 - void LetterBox(Mat* mat, const std::vector& size, - const std::vector& color, bool _auto, - bool scale_fill = false, bool scale_up = true, - int stride = 32); - - bool is_dynamic_input_; - - bool center_points_is_update_; - - typedef struct { - float cx; - float cy; - } SCRFDPoint; - - std::unordered_map> center_points_; -}; -} // namespace facedet -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/facedet/contrib/scrfd_pybind.cc b/csrcs/fastdeploy/vision/facedet/contrib/scrfd_pybind.cc deleted file mode 100644 index 7cfa4d025..000000000 --- a/csrcs/fastdeploy/vision/facedet/contrib/scrfd_pybind.cc +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/pybind/main.h" - -namespace fastdeploy { -void BindSCRFD(pybind11::module& m) { - // Bind SCRFD - pybind11::class_(m, "SCRFD") - .def(pybind11::init()) - .def("predict", - [](vision::facedet::SCRFD& self, pybind11::array& data, - float conf_threshold, float nms_iou_threshold) { - auto mat = PyArrayToCvMat(data); - vision::FaceDetectionResult res; - self.Predict(&mat, &res, conf_threshold, nms_iou_threshold); - return res; - }) - .def_readwrite("size", &vision::facedet::SCRFD::size) - .def_readwrite("padding_value", &vision::facedet::SCRFD::padding_value) - .def_readwrite("is_mini_pad", &vision::facedet::SCRFD::is_mini_pad) - .def_readwrite("is_no_pad", &vision::facedet::SCRFD::is_no_pad) - .def_readwrite("is_scale_up", &vision::facedet::SCRFD::is_scale_up) - .def_readwrite("stride", &vision::facedet::SCRFD::stride) - .def_readwrite("use_kps", &vision::facedet::SCRFD::use_kps) - .def_readwrite("max_nms", &vision::facedet::SCRFD::max_nms) - .def_readwrite("downsample_strides", - &vision::facedet::SCRFD::downsample_strides) - .def_readwrite("num_anchors", &vision::facedet::SCRFD::num_anchors) - .def_readwrite("landmarks_per_face", - &vision::facedet::SCRFD::landmarks_per_face); -} - -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/facedet/contrib/ultraface.cc b/csrcs/fastdeploy/vision/facedet/contrib/ultraface.cc deleted file mode 100644 index ed4962306..000000000 --- a/csrcs/fastdeploy/vision/facedet/contrib/ultraface.cc +++ /dev/null @@ -1,221 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/facedet/contrib/ultraface.h" -#include "fastdeploy/utils/perf.h" -#include "fastdeploy/vision/utils/utils.h" - -namespace fastdeploy { - -namespace vision { - -namespace facedet { - -UltraFace::UltraFace(const std::string& model_file, - const std::string& params_file, - const RuntimeOption& custom_option, - const Frontend& model_format) { - if (model_format == Frontend::ONNX) { - valid_cpu_backends = {Backend::ORT}; // 指定可用的CPU后端 - valid_gpu_backends = {Backend::ORT, Backend::TRT}; // 指定可用的GPU后端 - } else { - valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; - valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; - } - runtime_option = custom_option; - runtime_option.model_format = model_format; - runtime_option.model_file = model_file; - runtime_option.params_file = params_file; - initialized = Initialize(); -} - -bool UltraFace::Initialize() { - // parameters for preprocess - size = {320, 240}; - - if (!InitRuntime()) { - FDERROR << "Failed to initialize fastdeploy backend." << std::endl; - return false; - } - // Check if the input shape is dynamic after Runtime already initialized, - is_dynamic_input_ = false; - auto shape = InputInfoOfRuntime(0).shape; - for (int i = 0; i < shape.size(); ++i) { - // if height or width is dynamic - if (i >= 2 && shape[i] <= 0) { - is_dynamic_input_ = true; - break; - } - } - return true; -} - -bool UltraFace::Preprocess( - Mat* mat, FDTensor* output, - std::map>* im_info) { - // ultraface's preprocess steps - // 1. resize - // 2. BGR->RGB - // 3. HWC->CHW - int resize_w = size[0]; - int resize_h = size[1]; - if (resize_h != mat->Height() || resize_w != mat->Width()) { - Resize::Run(mat, resize_w, resize_h); - } - - BGR2RGB::Run(mat); - // Compute `result = mat * alpha + beta` directly by channel - // Reference: detect_imgs_onnx.py#L73 - std::vector alpha = {1.0f / 128.0f, 1.0f / 128.0f, 1.0f / 128.0f}; - std::vector beta = {-127.0f * (1.0f / 128.0f), - -127.0f * (1.0f / 128.0f), - -127.0f * (1.0f / 128.0f)}; // RGB; - Convert::Run(mat, alpha, beta); - - // Record output shape of preprocessed image - (*im_info)["output_shape"] = {static_cast(mat->Height()), - static_cast(mat->Width())}; - - HWC2CHW::Run(mat); - Cast::Run(mat, "float"); - mat->ShareWithTensor(output); - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c - return true; -} - -bool UltraFace::Postprocess( - std::vector& infer_result, FaceDetectionResult* result, - const std::map>& im_info, - float conf_threshold, float nms_iou_threshold) { - // ultraface has 2 output tensors, scores & boxes - FDASSERT( - (infer_result.size() == 2), - "The default number of output tensor must be 2 according to ultraface."); - FDTensor& scores_tensor = infer_result.at(0); // (1,4420,2) - FDTensor& boxes_tensor = infer_result.at(1); // (1,4420,4) - FDASSERT((scores_tensor.shape[0] == 1), "Only support batch =1 now."); - FDASSERT((boxes_tensor.shape[0] == 1), "Only support batch =1 now."); - if (scores_tensor.dtype != FDDataType::FP32) { - FDERROR << "Only support post process with float32 data." << std::endl; - return false; - } - if (boxes_tensor.dtype != FDDataType::FP32) { - FDERROR << "Only support post process with float32 data." << std::endl; - return false; - } - - result->Clear(); - // must be setup landmarks_per_face before reserve. - // ultraface detector does not detect landmarks by default. - result->landmarks_per_face = 0; - result->Reserve(boxes_tensor.shape[1]); - - float* scores_ptr = static_cast(scores_tensor.Data()); - float* boxes_ptr = static_cast(boxes_tensor.Data()); - const size_t num_bboxes = boxes_tensor.shape[1]; // e.g 4420 - // fetch original image shape - auto iter_ipt = im_info.find("input_shape"); - FDASSERT((iter_ipt != im_info.end()), - "Cannot find input_shape from im_info."); - float ipt_h = iter_ipt->second[0]; - float ipt_w = iter_ipt->second[1]; - - // decode bounding boxes - for (size_t i = 0; i < num_bboxes; ++i) { - float confidence = scores_ptr[2 * i + 1]; - // filter boxes by conf_threshold - if (confidence <= conf_threshold) { - continue; - } - float x1 = boxes_ptr[4 * i + 0] * ipt_w; - float y1 = boxes_ptr[4 * i + 1] * ipt_h; - float x2 = boxes_ptr[4 * i + 2] * ipt_w; - float y2 = boxes_ptr[4 * i + 3] * ipt_h; - result->boxes.emplace_back(std::array{x1, y1, x2, y2}); - result->scores.push_back(confidence); - } - - if (result->boxes.size() == 0) { - return true; - } - - utils::NMS(result, nms_iou_threshold); - - // scale and clip box - for (size_t i = 0; i < result->boxes.size(); ++i) { - result->boxes[i][0] = std::max(result->boxes[i][0], 0.0f); - result->boxes[i][1] = std::max(result->boxes[i][1], 0.0f); - result->boxes[i][2] = std::max(result->boxes[i][2], 0.0f); - result->boxes[i][3] = std::max(result->boxes[i][3], 0.0f); - result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f); - result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f); - result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f); - result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f); - } - return true; -} - -bool UltraFace::Predict(cv::Mat* im, FaceDetectionResult* result, - float conf_threshold, float nms_iou_threshold) { -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_START(0) -#endif - - Mat mat(*im); - std::vector input_tensors(1); - - std::map> im_info; - - // Record the shape of image and the shape of preprocessed image - im_info["input_shape"] = {static_cast(mat.Height()), - static_cast(mat.Width())}; - im_info["output_shape"] = {static_cast(mat.Height()), - static_cast(mat.Width())}; - - if (!Preprocess(&mat, &input_tensors[0], &im_info)) { - FDERROR << "Failed to preprocess input image." << std::endl; - return false; - } - -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_END(0, "Preprocess") - TIMERECORD_START(1) -#endif - - input_tensors[0].name = InputInfoOfRuntime(0).name; - std::vector output_tensors; - if (!Infer(input_tensors, &output_tensors)) { - FDERROR << "Failed to inference." << std::endl; - return false; - } -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_END(1, "Inference") - TIMERECORD_START(2) -#endif - - if (!Postprocess(output_tensors, result, im_info, conf_threshold, - nms_iou_threshold)) { - FDERROR << "Failed to post process." << std::endl; - return false; - } - -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_END(2, "Postprocess") -#endif - return true; -} - -} // namespace facedet -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/facedet/contrib/ultraface.h b/csrcs/fastdeploy/vision/facedet/contrib/ultraface.h deleted file mode 100644 index 387bc1f9a..000000000 --- a/csrcs/fastdeploy/vision/facedet/contrib/ultraface.h +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include "fastdeploy/fastdeploy_model.h" -#include "fastdeploy/vision/common/processors/transform.h" -#include "fastdeploy/vision/common/result.h" - -namespace fastdeploy { - -namespace vision { - -namespace facedet { - -class FASTDEPLOY_DECL UltraFace : public FastDeployModel { - public: - // 当model_format为ONNX时,无需指定params_file - // 当model_format为Paddle时,则需同时指定model_file & params_file - UltraFace(const std::string& model_file, const std::string& params_file = "", - const RuntimeOption& custom_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX); - - // 定义模型的名称 - std::string ModelName() const { - return "Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB"; - } - - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 - // conf_threshold 为后处理的参数 - // nms_iou_threshold 为后处理的参数 - virtual bool Predict(cv::Mat* im, FaceDetectionResult* result, - float conf_threshold = 0.7f, - float nms_iou_threshold = 0.3f); - - // 以下为模型在预测时的一些参数,基本是前后处理所需 - // 用户在创建模型后,可根据模型的要求,以及自己的需求 - // 对参数进行修改 - // tuple of (width, height), default (320, 240) - std::vector size; - - private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 - bool Initialize(); - - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 - // im_info为预处理过程保存的数据,在后处理中需要用到 - bool Preprocess(Mat* mat, FDTensor* outputs, - std::map>* im_info); - - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 - // im_info 为预处理记录的信息,后处理用于还原box - // conf_threshold 后处理时过滤box的置信度阈值 - // nms_iou_threshold 后处理时NMS设定的iou阈值 - bool Postprocess(std::vector& infer_result, - FaceDetectionResult* result, - const std::map>& im_info, - float conf_threshold, float nms_iou_threshold); - - // 查看输入是否为动态维度的 不建议直接使用 不同模型的逻辑可能不一致 - bool IsDynamicInput() const { return is_dynamic_input_; } - - bool is_dynamic_input_; -}; - -} // namespace facedet -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/facedet/contrib/ultraface_pybind.cc b/csrcs/fastdeploy/vision/facedet/contrib/ultraface_pybind.cc deleted file mode 100644 index 855c26908..000000000 --- a/csrcs/fastdeploy/vision/facedet/contrib/ultraface_pybind.cc +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/pybind/main.h" - -namespace fastdeploy { -void BindUltraFace(pybind11::module& m) { - pybind11::class_(m, "UltraFace") - .def(pybind11::init()) - .def("predict", - [](vision::facedet::UltraFace& self, pybind11::array& data, - float conf_threshold, float nms_iou_threshold) { - auto mat = PyArrayToCvMat(data); - vision::FaceDetectionResult res; - self.Predict(&mat, &res, conf_threshold, nms_iou_threshold); - return res; - }) - .def_readwrite("size", &vision::facedet::UltraFace::size); -} -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/facedet/contrib/yolov5face.cc b/csrcs/fastdeploy/vision/facedet/contrib/yolov5face.cc deleted file mode 100644 index 96af230b0..000000000 --- a/csrcs/fastdeploy/vision/facedet/contrib/yolov5face.cc +++ /dev/null @@ -1,294 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/facedet/contrib/yolov5face.h" -#include "fastdeploy/utils/perf.h" -#include "fastdeploy/vision/utils/utils.h" - -namespace fastdeploy { - -namespace vision { - -namespace facedet { - -void LetterBox(Mat* mat, std::vector size, std::vector color, - bool _auto, bool scale_fill = false, bool scale_up = true, - int stride = 32) { - float scale = - std::min(size[1] * 1.0 / mat->Height(), size[0] * 1.0 / mat->Width()); - if (!scale_up) { - scale = std::min(scale, 1.0f); - } - - int resize_h = int(round(mat->Height() * scale)); - int resize_w = int(round(mat->Width() * scale)); - - int pad_w = size[0] - resize_w; - int pad_h = size[1] - resize_h; - if (_auto) { - pad_h = pad_h % stride; - pad_w = pad_w % stride; - } else if (scale_fill) { - pad_h = 0; - pad_w = 0; - resize_h = size[1]; - resize_w = size[0]; - } - if (resize_h != mat->Height() || resize_w != mat->Width()) { - Resize::Run(mat, resize_w, resize_h); - } - if (pad_h > 0 || pad_w > 0) { - float half_h = pad_h * 1.0 / 2; - int top = int(round(half_h - 0.1)); - int bottom = int(round(half_h + 0.1)); - float half_w = pad_w * 1.0 / 2; - int left = int(round(half_w - 0.1)); - int right = int(round(half_w + 0.1)); - Pad::Run(mat, top, bottom, left, right, color); - } -} - -YOLOv5Face::YOLOv5Face(const std::string& model_file, - const std::string& params_file, - const RuntimeOption& custom_option, - const Frontend& model_format) { - if (model_format == Frontend::ONNX) { - valid_cpu_backends = {Backend::ORT}; // 指定可用的CPU后端 - valid_gpu_backends = {Backend::ORT, Backend::TRT}; // 指定可用的GPU后端 - } else { - valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; - valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; - } - runtime_option = custom_option; - runtime_option.model_format = model_format; - runtime_option.model_file = model_file; - runtime_option.params_file = params_file; - initialized = Initialize(); -} - -bool YOLOv5Face::Initialize() { - // parameters for preprocess - size = {640, 640}; - padding_value = {114.0, 114.0, 114.0}; - is_mini_pad = false; - is_no_pad = false; - is_scale_up = false; - stride = 32; - landmarks_per_face = 5; - - if (!InitRuntime()) { - FDERROR << "Failed to initialize fastdeploy backend." << std::endl; - return false; - } - // Check if the input shape is dynamic after Runtime already initialized, - // Note that, We need to force is_mini_pad 'false' to keep static - // shape after padding (LetterBox) when the is_dynamic_input_ is 'false'. - is_dynamic_input_ = false; - auto shape = InputInfoOfRuntime(0).shape; - for (int i = 0; i < shape.size(); ++i) { - // if height or width is dynamic - if (i >= 2 && shape[i] <= 0) { - is_dynamic_input_ = true; - break; - } - } - if (!is_dynamic_input_) { - is_mini_pad = false; - } - return true; -} - -bool YOLOv5Face::Preprocess( - Mat* mat, FDTensor* output, - std::map>* im_info) { - // process after image load - float ratio = std::min(size[1] * 1.0f / static_cast(mat->Height()), - size[0] * 1.0f / static_cast(mat->Width())); - if (ratio != 1.0) { // always true - int interp = cv::INTER_AREA; - if (ratio > 1.0) { - interp = cv::INTER_LINEAR; - } - int resize_h = int(round(static_cast(mat->Height()) * ratio)); - int resize_w = int(round(static_cast(mat->Width()) * ratio)); - Resize::Run(mat, resize_w, resize_h, -1, -1, interp); - } - // yolov5face's preprocess steps - // 1. letterbox - // 2. BGR->RGB - // 3. HWC->CHW - LetterBox(mat, size, padding_value, is_mini_pad, is_no_pad, is_scale_up, - stride); - BGR2RGB::Run(mat); - // Normalize::Run(mat, std::vector(mat->Channels(), 0.0), - // std::vector(mat->Channels(), 1.0)); - // Compute `result = mat * alpha + beta` directly by channel - std::vector alpha = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; - std::vector beta = {0.0f, 0.0f, 0.0f}; - Convert::Run(mat, alpha, beta); - - // Record output shape of preprocessed image - (*im_info)["output_shape"] = {static_cast(mat->Height()), - static_cast(mat->Width())}; - - HWC2CHW::Run(mat); - Cast::Run(mat, "float"); - mat->ShareWithTensor(output); - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c - return true; -} - -bool YOLOv5Face::Postprocess( - FDTensor& infer_result, FaceDetectionResult* result, - const std::map>& im_info, - float conf_threshold, float nms_iou_threshold) { - // infer_result: (1,n,16) 16=4+1+10+1 - FDASSERT(infer_result.shape[0] == 1, "Only support batch =1 now."); - if (infer_result.dtype != FDDataType::FP32) { - FDERROR << "Only support post process with float32 data." << std::endl; - return false; - } - - result->Clear(); - // must be setup landmarks_per_face before reserve - result->landmarks_per_face = landmarks_per_face; - result->Reserve(infer_result.shape[1]); - - float* data = static_cast(infer_result.Data()); - for (size_t i = 0; i < infer_result.shape[1]; ++i) { - float* reg_cls_ptr = data + (i * infer_result.shape[2]); - float obj_conf = reg_cls_ptr[4]; - float cls_conf = reg_cls_ptr[15]; - float confidence = obj_conf * cls_conf; - // filter boxes by conf_threshold - if (confidence <= conf_threshold) { - continue; - } - float x = reg_cls_ptr[0]; - float y = reg_cls_ptr[1]; - float w = reg_cls_ptr[2]; - float h = reg_cls_ptr[3]; - - // convert from [x, y, w, h] to [x1, y1, x2, y2] - result->boxes.emplace_back(std::array{ - (x - w / 2.f), (y - h / 2.f), (x + w / 2.f), (y + h / 2.f)}); - result->scores.push_back(confidence); - // decode landmarks (default 5 landmarks) - if (landmarks_per_face > 0) { - float* landmarks_ptr = reg_cls_ptr + 5; - for (size_t j = 0; j < landmarks_per_face * 2; j += 2) { - result->landmarks.emplace_back( - std::array{landmarks_ptr[j], landmarks_ptr[j + 1]}); - } - } - } - - if (result->boxes.size() == 0) { - return true; - } - - utils::NMS(result, nms_iou_threshold); - - // scale the boxes to the origin image shape - auto iter_out = im_info.find("output_shape"); - auto iter_ipt = im_info.find("input_shape"); - FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(), - "Cannot find input_shape or output_shape from im_info."); - float out_h = iter_out->second[0]; - float out_w = iter_out->second[1]; - float ipt_h = iter_ipt->second[0]; - float ipt_w = iter_ipt->second[1]; - float scale = std::min(out_h / ipt_h, out_w / ipt_w); - float pad_h = (out_h - ipt_h * scale) / 2.f; - float pad_w = (out_w - ipt_w * scale) / 2.f; - if (is_mini_pad) { - pad_h = static_cast(static_cast(pad_h) % stride); - pad_w = static_cast(static_cast(pad_w) % stride); - } - // scale and clip box - for (size_t i = 0; i < result->boxes.size(); ++i) { - result->boxes[i][0] = std::max((result->boxes[i][0] - pad_w) / scale, 0.0f); - result->boxes[i][1] = std::max((result->boxes[i][1] - pad_h) / scale, 0.0f); - result->boxes[i][2] = std::max((result->boxes[i][2] - pad_w) / scale, 0.0f); - result->boxes[i][3] = std::max((result->boxes[i][3] - pad_h) / scale, 0.0f); - result->boxes[i][0] = std::min(result->boxes[i][0], ipt_w - 1.0f); - result->boxes[i][1] = std::min(result->boxes[i][1], ipt_h - 1.0f); - result->boxes[i][2] = std::min(result->boxes[i][2], ipt_w - 1.0f); - result->boxes[i][3] = std::min(result->boxes[i][3], ipt_h - 1.0f); - } - // scale and clip landmarks - for (size_t i = 0; i < result->landmarks.size(); ++i) { - result->landmarks[i][0] = - std::max((result->landmarks[i][0] - pad_w) / scale, 0.0f); - result->landmarks[i][1] = - std::max((result->landmarks[i][1] - pad_h) / scale, 0.0f); - result->landmarks[i][0] = std::min(result->landmarks[i][0], ipt_w - 1.0f); - result->landmarks[i][1] = std::min(result->landmarks[i][1], ipt_h - 1.0f); - } - return true; -} - -bool YOLOv5Face::Predict(cv::Mat* im, FaceDetectionResult* result, - float conf_threshold, float nms_iou_threshold) { -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_START(0) -#endif - - Mat mat(*im); - std::vector input_tensors(1); - - std::map> im_info; - - // Record the shape of image and the shape of preprocessed image - im_info["input_shape"] = {static_cast(mat.Height()), - static_cast(mat.Width())}; - im_info["output_shape"] = {static_cast(mat.Height()), - static_cast(mat.Width())}; - - if (!Preprocess(&mat, &input_tensors[0], &im_info)) { - FDERROR << "Failed to preprocess input image." << std::endl; - return false; - } - -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_END(0, "Preprocess") - TIMERECORD_START(1) -#endif - - input_tensors[0].name = InputInfoOfRuntime(0).name; - std::vector output_tensors; - if (!Infer(input_tensors, &output_tensors)) { - FDERROR << "Failed to inference." << std::endl; - return false; - } -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_END(1, "Inference") - TIMERECORD_START(2) -#endif - - if (!Postprocess(output_tensors[0], result, im_info, conf_threshold, - nms_iou_threshold)) { - FDERROR << "Failed to post process." << std::endl; - return false; - } - -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_END(2, "Postprocess") -#endif - return true; -} - -} // namespace facedet -} // namespace vision -} // namespace fastdeploy \ No newline at end of file diff --git a/csrcs/fastdeploy/vision/facedet/contrib/yolov5face.h b/csrcs/fastdeploy/vision/facedet/contrib/yolov5face.h deleted file mode 100644 index 017c9681a..000000000 --- a/csrcs/fastdeploy/vision/facedet/contrib/yolov5face.h +++ /dev/null @@ -1,97 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include "fastdeploy/fastdeploy_model.h" -#include "fastdeploy/vision/common/processors/transform.h" -#include "fastdeploy/vision/common/result.h" - -namespace fastdeploy { - -namespace vision { - -namespace facedet { - -class FASTDEPLOY_DECL YOLOv5Face : public FastDeployModel { - public: - // 当model_format为ONNX时,无需指定params_file - // 当model_format为Paddle时,则需同时指定model_file & params_file - YOLOv5Face(const std::string& model_file, const std::string& params_file = "", - const RuntimeOption& custom_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX); - - // 定义模型的名称 - std::string ModelName() const { return "yolov5-face"; } - - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 - // conf_threshold 为后处理的参数 - // nms_iou_threshold 为后处理的参数 - virtual bool Predict(cv::Mat* im, FaceDetectionResult* result, - float conf_threshold = 0.25, - float nms_iou_threshold = 0.5); - - // 以下为模型在预测时的一些参数,基本是前后处理所需 - // 用户在创建模型后,可根据模型的要求,以及自己的需求 - // 对参数进行修改 - // tuple of (width, height) - std::vector size; - // padding value, size should be same with Channels - std::vector padding_value; - // only pad to the minimum rectange which height and width is times of stride - bool is_mini_pad; - // while is_mini_pad = false and is_no_pad = true, will resize the image to - // the set size - bool is_no_pad; - // if is_scale_up is false, the input image only can be zoom out, the maximum - // resize scale cannot exceed 1.0 - bool is_scale_up; - // padding stride, for is_mini_pad - int stride; - // setup the number of landmarks for per face (if have), default 5 in - // official yolov5face note that, the outupt tensor's shape must be: - // (1,n,4+1+2*landmarks_per_face+1=box+obj+landmarks+cls) - int landmarks_per_face; - - private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 - bool Initialize(); - - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 - // im_info为预处理过程保存的数据,在后处理中需要用到 - bool Preprocess(Mat* mat, FDTensor* outputs, - std::map>* im_info); - - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 - // im_info 为预处理记录的信息,后处理用于还原box - // conf_threshold 后处理时过滤box的置信度阈值 - // nms_iou_threshold 后处理时NMS设定的iou阈值 - bool Postprocess(FDTensor& infer_result, FaceDetectionResult* result, - const std::map>& im_info, - float conf_threshold, float nms_iou_threshold); - - // 查看输入是否为动态维度的 不建议直接使用 不同模型的逻辑可能不一致 - bool IsDynamicInput() const { return is_dynamic_input_; } - - bool is_dynamic_input_; -}; - -} // namespace facedet -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/facedet/contrib/yolov5face_pybind.cc b/csrcs/fastdeploy/vision/facedet/contrib/yolov5face_pybind.cc deleted file mode 100644 index b843d4a9f..000000000 --- a/csrcs/fastdeploy/vision/facedet/contrib/yolov5face_pybind.cc +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/pybind/main.h" - -namespace fastdeploy { -void BindYOLOv5Face(pybind11::module& m) { - pybind11::class_(m, - "YOLOv5Face") - .def(pybind11::init()) - .def("predict", - [](vision::facedet::YOLOv5Face& self, pybind11::array& data, - float conf_threshold, float nms_iou_threshold) { - auto mat = PyArrayToCvMat(data); - vision::FaceDetectionResult res; - self.Predict(&mat, &res, conf_threshold, nms_iou_threshold); - return res; - }) - .def_readwrite("size", &vision::facedet::YOLOv5Face::size) - .def_readwrite("padding_value", - &vision::facedet::YOLOv5Face::padding_value) - .def_readwrite("is_mini_pad", &vision::facedet::YOLOv5Face::is_mini_pad) - .def_readwrite("is_no_pad", &vision::facedet::YOLOv5Face::is_no_pad) - .def_readwrite("is_scale_up", &vision::facedet::YOLOv5Face::is_scale_up) - .def_readwrite("stride", &vision::facedet::YOLOv5Face::stride) - .def_readwrite("landmarks_per_face", - &vision::facedet::YOLOv5Face::landmarks_per_face); -} - -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/facedet/facedet_pybind.cc b/csrcs/fastdeploy/vision/facedet/facedet_pybind.cc deleted file mode 100644 index 3d9a812af..000000000 --- a/csrcs/fastdeploy/vision/facedet/facedet_pybind.cc +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/pybind/main.h" - -namespace fastdeploy { - -void BindRetinaFace(pybind11::module& m); -void BindUltraFace(pybind11::module& m); -void BindYOLOv5Face(pybind11::module& m); -void BindSCRFD(pybind11::module& m); - -void BindFaceDet(pybind11::module& m) { - auto facedet_module = m.def_submodule("facedet", "Face detection models."); - BindRetinaFace(facedet_module); - BindUltraFace(facedet_module); - BindYOLOv5Face(facedet_module); - BindSCRFD(facedet_module); -} -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/faceid/contrib/arcface.cc b/csrcs/fastdeploy/vision/faceid/contrib/arcface.cc deleted file mode 100644 index 9c2b64763..000000000 --- a/csrcs/fastdeploy/vision/faceid/contrib/arcface.cc +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/faceid/contrib/arcface.h" -#include "fastdeploy/utils/perf.h" -#include "fastdeploy/vision/utils/utils.h" - -namespace fastdeploy { - -namespace vision { - -namespace faceid { - -ArcFace::ArcFace(const std::string& model_file, const std::string& params_file, - const RuntimeOption& custom_option, - const Frontend& model_format) - : InsightFaceRecognitionModel(model_file, params_file, custom_option, - model_format) { - initialized = Initialize(); -} - -bool ArcFace::Initialize() { - // 如果初始化有变化 修改该子类函数 - // 这里需要判断backend是否已经initialized,如果是,则不应该再调用 - // InsightFaceRecognitionModel::Initialize() - // 因为该函数会对backend进行初始化, backend已经在父类的构造函数初始化 - // 这里只修改一些模型相关的属性 - - // (1) 如果父类初始化了backend - if (initialized) { - // (1.1) re-init parameters for specific sub-classes - size = {112, 112}; - alpha = {1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f}; - beta = {-1.f, -1.f, -1.f}; // RGB - swap_rb = true; - l2_normalize = false; - return true; - } - // (2) 如果父类没有初始化backend - if (!InsightFaceRecognitionModel::Initialize()) { - FDERROR << "Failed to initialize fastdeploy backend." << std::endl; - return false; - } - // (2.1) re-init parameters for specific sub-classes - size = {112, 112}; - alpha = {1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f}; - beta = {-1.f, -1.f, -1.f}; // RGB - swap_rb = true; - l2_normalize = false; - return true; -} - -bool ArcFace::Preprocess(Mat* mat, FDTensor* output) { - // 如果预处理有变化 修改该子类函数 - return InsightFaceRecognitionModel::Preprocess(mat, output); -} - -bool ArcFace::Postprocess(std::vector& infer_result, - FaceRecognitionResult* result) { - // 如果后处理有变化 修改该子类函数 - return InsightFaceRecognitionModel::Postprocess(infer_result, result); -} - -bool ArcFace::Predict(cv::Mat* im, FaceRecognitionResult* result) { - // 如果前后处理有变化 则override子类的Preprocess和Postprocess - // 如果前后处理有变化 此处应该调用子类自己的Preprocess和Postprocess - return InsightFaceRecognitionModel::Predict(im, result); -} - -} // namespace faceid -} // namespace vision -} // namespace fastdeploy \ No newline at end of file diff --git a/csrcs/fastdeploy/vision/faceid/contrib/arcface.h b/csrcs/fastdeploy/vision/faceid/contrib/arcface.h deleted file mode 100644 index 698fadceb..000000000 --- a/csrcs/fastdeploy/vision/faceid/contrib/arcface.h +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include "fastdeploy/fastdeploy_model.h" -#include "fastdeploy/vision/common/processors/transform.h" -#include "fastdeploy/vision/common/result.h" -#include "fastdeploy/vision/faceid/contrib/insightface_rec.h" - -namespace fastdeploy { - -namespace vision { - -namespace faceid { - -class FASTDEPLOY_DECL ArcFace : public InsightFaceRecognitionModel { - public: - // 当model_format为ONNX时,无需指定params_file - // 当model_format为Paddle时,则需同时指定model_file & params_file - // ArcFace支持IResNet, IResNet2060, VIT, MobileFaceNet骨干 - ArcFace(const std::string& model_file, const std::string& params_file = "", - const RuntimeOption& custom_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX); - - // 定义模型的名称 - std::string ModelName() const override { - return "deepinsight/insightface/recognition/arcface_pytorch"; - } - - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 - bool Predict(cv::Mat* im, FaceRecognitionResult* result) override; - // 父类中包含 size, alpha, beta, swap_rb, l2_normalize 等基本可配置属性 - - private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 - bool Initialize() override; - - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 - bool Preprocess(Mat* mat, FDTensor* output) override; - - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 - bool Postprocess(std::vector& infer_result, - FaceRecognitionResult* result) override; -}; - -} // namespace faceid -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/faceid/contrib/arcface_pybind.cc b/csrcs/fastdeploy/vision/faceid/contrib/arcface_pybind.cc deleted file mode 100644 index cd9bf7c57..000000000 --- a/csrcs/fastdeploy/vision/faceid/contrib/arcface_pybind.cc +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/pybind/main.h" - -namespace fastdeploy { -void BindArcFace(pybind11::module& m) { - // Bind ArcFace - pybind11::class_(m, "ArcFace") - .def(pybind11::init()) - .def("predict", - [](vision::faceid::ArcFace& self, pybind11::array& data) { - auto mat = PyArrayToCvMat(data); - vision::FaceRecognitionResult res; - self.Predict(&mat, &res); - return res; - }) - .def_readwrite("size", &vision::faceid::ArcFace::size) - .def_readwrite("alpha", &vision::faceid::ArcFace::alpha) - .def_readwrite("beta", &vision::faceid::ArcFace::beta) - .def_readwrite("swap_rb", &vision::faceid::ArcFace::swap_rb) - .def_readwrite("l2_normalize", &vision::faceid::ArcFace::l2_normalize); -} - -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/faceid/contrib/cosface.cc b/csrcs/fastdeploy/vision/faceid/contrib/cosface.cc deleted file mode 100644 index 4a4d6dc55..000000000 --- a/csrcs/fastdeploy/vision/faceid/contrib/cosface.cc +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/faceid/contrib/cosface.h" -#include "fastdeploy/utils/perf.h" -#include "fastdeploy/vision/utils/utils.h" - -namespace fastdeploy { - -namespace vision { - -namespace faceid { - -CosFace::CosFace(const std::string& model_file, const std::string& params_file, - const RuntimeOption& custom_option, - const Frontend& model_format) - : InsightFaceRecognitionModel(model_file, params_file, custom_option, - model_format) { - initialized = Initialize(); -} - -bool CosFace::Initialize() { - // 如果初始化有变化 修改该子类函数 - // 这里需要判断backend是否已经initialized,如果是,则不应该再调用 - // InsightFaceRecognitionModel::Initialize() - // 因为该函数会对backend进行初始化, backend已经在父类的构造函数初始化 - // 这里只修改一些模型相关的属性 - - // (1) 如果父类初始化了backend - if (initialized) { - // (1.1) re-init parameters for specific sub-classes - size = {112, 112}; - alpha = {1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f}; - beta = {-1.f, -1.f, -1.f}; // RGB - swap_rb = true; - l2_normalize = false; - return true; - } - // (2) 如果父类没有初始化backend - if (!InsightFaceRecognitionModel::Initialize()) { - FDERROR << "Failed to initialize fastdeploy backend." << std::endl; - return false; - } - // (2.1) re-init parameters for specific sub-classes - size = {112, 112}; - alpha = {1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f}; - beta = {-1.f, -1.f, -1.f}; // RGB - swap_rb = true; - l2_normalize = false; - return true; -} - -bool CosFace::Preprocess(Mat* mat, FDTensor* output) { - // 如果预处理有变化 修改该子类函数 - return InsightFaceRecognitionModel::Preprocess(mat, output); -} - -bool CosFace::Postprocess(std::vector& infer_result, - FaceRecognitionResult* result) { - // 如果后处理有变化 修改该子类函数 - return InsightFaceRecognitionModel::Postprocess(infer_result, result); -} - -bool CosFace::Predict(cv::Mat* im, FaceRecognitionResult* result) { - // 如果前后处理有变化 则override子类的Preprocess和Postprocess - // 如果前后处理有变化 此处应该调用子类自己的Preprocess和Postprocess - return InsightFaceRecognitionModel::Predict(im, result); -} - -} // namespace faceid -} // namespace vision -} // namespace fastdeploy \ No newline at end of file diff --git a/csrcs/fastdeploy/vision/faceid/contrib/cosface.h b/csrcs/fastdeploy/vision/faceid/contrib/cosface.h deleted file mode 100644 index 92704536c..000000000 --- a/csrcs/fastdeploy/vision/faceid/contrib/cosface.h +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include "fastdeploy/fastdeploy_model.h" -#include "fastdeploy/vision/common/processors/transform.h" -#include "fastdeploy/vision/common/result.h" -#include "fastdeploy/vision/faceid/contrib/insightface_rec.h" - -namespace fastdeploy { - -namespace vision { - -namespace faceid { - -class FASTDEPLOY_DECL CosFace : public InsightFaceRecognitionModel { - public: - // 当model_format为ONNX时,无需指定params_file - // 当model_format为Paddle时,则需同时指定model_file & params_file - // ArcFace支持IResNet, IResNet2060, VIT, MobileFaceNet骨干 - CosFace(const std::string& model_file, const std::string& params_file = "", - const RuntimeOption& custom_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX); - - // 定义模型的名称 - // insightface/arcface提供的模型文件包含了cosface - std::string ModelName() const override { - return "deepinsight/insightface/recognition/arcface_pytorch"; - } - - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 - bool Predict(cv::Mat* im, FaceRecognitionResult* result) override; - // 父类中包含 size, alpha, beta, swap_rb, l2_normalize 等基本可配置属性 - - private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 - bool Initialize() override; - - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 - bool Preprocess(Mat* mat, FDTensor* output) override; - - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 - bool Postprocess(std::vector& infer_result, - FaceRecognitionResult* result) override; -}; - -} // namespace faceid -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/faceid/contrib/cosface_pybind.cc b/csrcs/fastdeploy/vision/faceid/contrib/cosface_pybind.cc deleted file mode 100644 index c09f9e723..000000000 --- a/csrcs/fastdeploy/vision/faceid/contrib/cosface_pybind.cc +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/pybind/main.h" - -namespace fastdeploy { -void BindCosFace(pybind11::module& m) { - // Bind CosFace - pybind11::class_(m, "CosFace") - .def(pybind11::init()) - .def("predict", - [](vision::faceid::CosFace& self, pybind11::array& data) { - auto mat = PyArrayToCvMat(data); - vision::FaceRecognitionResult res; - self.Predict(&mat, &res); - return res; - }) - .def_readwrite("size", &vision::faceid::CosFace::size) - .def_readwrite("alpha", &vision::faceid::CosFace::alpha) - .def_readwrite("beta", &vision::faceid::CosFace::beta) - .def_readwrite("swap_rb", &vision::faceid::CosFace::swap_rb) - .def_readwrite("l2_normalize", &vision::faceid::CosFace::l2_normalize); -} - -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/faceid/contrib/insightface_rec.cc b/csrcs/fastdeploy/vision/faceid/contrib/insightface_rec.cc deleted file mode 100644 index ddd7520d4..000000000 --- a/csrcs/fastdeploy/vision/faceid/contrib/insightface_rec.cc +++ /dev/null @@ -1,153 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/faceid/contrib/insightface_rec.h" -#include "fastdeploy/utils/perf.h" -#include "fastdeploy/vision/utils/utils.h" - -namespace fastdeploy { - -namespace vision { - -namespace faceid { - -InsightFaceRecognitionModel::InsightFaceRecognitionModel( - const std::string& model_file, const std::string& params_file, - const RuntimeOption& custom_option, const Frontend& model_format) { - if (model_format == Frontend::ONNX) { - valid_cpu_backends = {Backend::ORT}; // 指定可用的CPU后端 - valid_gpu_backends = {Backend::ORT, Backend::TRT}; // 指定可用的GPU后端 - } else { - valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; - valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; - } - runtime_option = custom_option; - runtime_option.model_format = model_format; - runtime_option.model_file = model_file; - runtime_option.params_file = params_file; - initialized = Initialize(); -} - -bool InsightFaceRecognitionModel::Initialize() { - // parameters for preprocess - size = {112, 112}; - alpha = {1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f}; - beta = {-1.f, -1.f, -1.f}; // RGB - swap_rb = true; - l2_normalize = false; - - if (!InitRuntime()) { - FDERROR << "Failed to initialize fastdeploy backend." << std::endl; - return false; - } - return true; -} - -bool InsightFaceRecognitionModel::Preprocess(Mat* mat, FDTensor* output) { - // face recognition model's preprocess steps in insightface - // reference: insightface/recognition/arcface_torch/inference.py - // 1. Resize - // 2. BGR2RGB - // 3. Convert(opencv style) or Normalize - // 4. HWC2CHW - int resize_w = size[0]; - int resize_h = size[1]; - if (resize_h != mat->Height() || resize_w != mat->Width()) { - Resize::Run(mat, resize_w, resize_h); - } - if (swap_rb) { - BGR2RGB::Run(mat); - } - - Convert::Run(mat, alpha, beta); - HWC2CHW::Run(mat); - Cast::Run(mat, "float"); - - mat->ShareWithTensor(output); - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c - return true; -} - -bool InsightFaceRecognitionModel::Postprocess( - std::vector& infer_result, FaceRecognitionResult* result) { - FDASSERT((infer_result.size() == 1), - "The default number of output tensor must be 1 according to " - "insightface."); - FDTensor& embedding_tensor = infer_result.at(0); - FDASSERT((embedding_tensor.shape[0] == 1), "Only support batch =1 now."); - if (embedding_tensor.dtype != FDDataType::FP32) { - FDERROR << "Only support post process with float32 data." << std::endl; - return false; - } - - result->Clear(); - result->Resize(embedding_tensor.Numel()); - // Copy the raw embedding vector directly without L2 normalize - // post process. Let the user decide whether to normalize or not. - // Will call utils::L2Normlize() method to perform L2 - // normalize if l2_normalize was set as 'true'. - std::memcpy(result->embedding.data(), embedding_tensor.Data(), - embedding_tensor.Nbytes()); - if (l2_normalize) { - auto norm_embedding = utils::L2Normalize(result->embedding); - std::memcpy(result->embedding.data(), norm_embedding.data(), - embedding_tensor.Nbytes()); - } - return true; -} - -bool InsightFaceRecognitionModel::Predict(cv::Mat* im, - FaceRecognitionResult* result) { -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_START(0) -#endif - - Mat mat(*im); - std::vector input_tensors(1); - - if (!Preprocess(&mat, &input_tensors[0])) { - FDERROR << "Failed to preprocess input image." << std::endl; - return false; - } - -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_END(0, "Preprocess") - TIMERECORD_START(1) -#endif - - input_tensors[0].name = InputInfoOfRuntime(0).name; - std::vector output_tensors; - if (!Infer(input_tensors, &output_tensors)) { - FDERROR << "Failed to inference." << std::endl; - return false; - } -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_END(1, "Inference") - TIMERECORD_START(2) -#endif - - if (!Postprocess(output_tensors, result)) { - FDERROR << "Failed to post process." << std::endl; - return false; - } - -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_END(2, "Postprocess") -#endif - return true; -} - -} // namespace faceid -} // namespace vision -} // namespace fastdeploy \ No newline at end of file diff --git a/csrcs/fastdeploy/vision/faceid/contrib/insightface_rec.h b/csrcs/fastdeploy/vision/faceid/contrib/insightface_rec.h deleted file mode 100644 index b8eb27262..000000000 --- a/csrcs/fastdeploy/vision/faceid/contrib/insightface_rec.h +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include "fastdeploy/fastdeploy_model.h" -#include "fastdeploy/vision/common/processors/transform.h" -#include "fastdeploy/vision/common/result.h" - -namespace fastdeploy { - -namespace vision { - -namespace faceid { - -class FASTDEPLOY_DECL InsightFaceRecognitionModel : public FastDeployModel { - public: - // 当model_format为ONNX时,无需指定params_file - // 当model_format为Paddle时,则需同时指定model_file & params_file - // 支持insightface/recognition人脸识别模型的基类 - InsightFaceRecognitionModel( - const std::string& model_file, const std::string& params_file = "", - const RuntimeOption& custom_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX); - - // 定义模型的名称 - virtual std::string ModelName() const { return "deepinsight/insightface"; } - - // 以下为一些可供用户修改的属性 - // tuple of (width, height), default (112, 112) - std::vector size; - // 归一化的 alpha 和 beta,x'=x*alpha+beta - std::vector alpha; - std::vector beta; - // whether to swap the B and R channel, such as BGR->RGB, default true. - bool swap_rb; - // whether to apply l2 normalize to embedding values, default; - bool l2_normalize; - - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 - virtual bool Predict(cv::Mat* im, FaceRecognitionResult* result); - - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 - virtual bool Initialize(); - - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 - virtual bool Preprocess(Mat* mat, FDTensor* output); - - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 - virtual bool Postprocess(std::vector& infer_result, - FaceRecognitionResult* result); -}; - -} // namespace faceid -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/faceid/contrib/insightface_rec_pybind.cc b/csrcs/fastdeploy/vision/faceid/contrib/insightface_rec_pybind.cc deleted file mode 100644 index 78df369bb..000000000 --- a/csrcs/fastdeploy/vision/faceid/contrib/insightface_rec_pybind.cc +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/pybind/main.h" - -namespace fastdeploy { -void BindInsightFaceRecognitionModel(pybind11::module& m) { - // Bind InsightFaceRecognitionModel - pybind11::class_(m, "InsightFaceRecognitionModel") - .def(pybind11::init()) - .def("predict", - [](vision::faceid::InsightFaceRecognitionModel& self, - pybind11::array& data) { - auto mat = PyArrayToCvMat(data); - vision::FaceRecognitionResult res; - self.Predict(&mat, &res); - return res; - }) - .def_readwrite("size", &vision::faceid::InsightFaceRecognitionModel::size) - .def_readwrite("alpha", - &vision::faceid::InsightFaceRecognitionModel::alpha) - .def_readwrite("beta", &vision::faceid::InsightFaceRecognitionModel::beta) - .def_readwrite("swap_rb", - &vision::faceid::InsightFaceRecognitionModel::swap_rb) - .def_readwrite( - "l2_normalize", - &vision::faceid::InsightFaceRecognitionModel::l2_normalize); -} - -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/faceid/contrib/partial_fc.cc b/csrcs/fastdeploy/vision/faceid/contrib/partial_fc.cc deleted file mode 100644 index 8f13226cb..000000000 --- a/csrcs/fastdeploy/vision/faceid/contrib/partial_fc.cc +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/faceid/contrib/partial_fc.h" -#include "fastdeploy/utils/perf.h" -#include "fastdeploy/vision/utils/utils.h" - -namespace fastdeploy { - -namespace vision { - -namespace faceid { - -PartialFC::PartialFC(const std::string& model_file, - const std::string& params_file, - const RuntimeOption& custom_option, - const Frontend& model_format) - : InsightFaceRecognitionModel(model_file, params_file, custom_option, - model_format) { - initialized = Initialize(); -} - -bool PartialFC::Initialize() { - // 如果初始化有变化 修改该子类函数 - // 这里需要判断backend是否已经initialized,如果是,则不应该再调用 - // InsightFaceRecognitionModel::Initialize() - // 因为该函数会对backend进行初始化, backend已经在父类的构造函数初始化 - // 这里只修改一些模型相关的属性 - - // (1) 如果父类初始化了backend - if (initialized) { - // (1.1) re-init parameters for specific sub-classes - size = {112, 112}; - alpha = {1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f}; - beta = {-1.f, -1.f, -1.f}; // RGB - swap_rb = true; - l2_normalize = false; - return true; - } - // (2) 如果父类没有初始化backend - if (!InsightFaceRecognitionModel::Initialize()) { - FDERROR << "Failed to initialize fastdeploy backend." << std::endl; - return false; - } - // (2.1) re-init parameters for specific sub-classes - size = {112, 112}; - alpha = {1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f}; - beta = {-1.f, -1.f, -1.f}; // RGB - swap_rb = true; - l2_normalize = false; - return true; -} - -bool PartialFC::Preprocess(Mat* mat, FDTensor* output) { - // 如果预处理有变化 修改该子类函数 - return InsightFaceRecognitionModel::Preprocess(mat, output); -} - -bool PartialFC::Postprocess(std::vector& infer_result, - FaceRecognitionResult* result) { - // 如果后处理有变化 修改该子类函数 - return InsightFaceRecognitionModel::Postprocess(infer_result, result); -} - -bool PartialFC::Predict(cv::Mat* im, FaceRecognitionResult* result) { - // 如果前后处理有变化 则override子类的Preprocess和Postprocess - // 如果前后处理有变化 此处应该调用子类自己的Preprocess和Postprocess - return InsightFaceRecognitionModel::Predict(im, result); -} - -} // namespace faceid -} // namespace vision -} // namespace fastdeploy \ No newline at end of file diff --git a/csrcs/fastdeploy/vision/faceid/contrib/partial_fc.h b/csrcs/fastdeploy/vision/faceid/contrib/partial_fc.h deleted file mode 100644 index 88a1f2a2a..000000000 --- a/csrcs/fastdeploy/vision/faceid/contrib/partial_fc.h +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include "fastdeploy/fastdeploy_model.h" -#include "fastdeploy/vision/common/processors/transform.h" -#include "fastdeploy/vision/common/result.h" -#include "fastdeploy/vision/faceid/contrib/insightface_rec.h" - -namespace fastdeploy { - -namespace vision { - -namespace faceid { - -class FASTDEPLOY_DECL PartialFC : public InsightFaceRecognitionModel { - public: - // 当model_format为ONNX时,无需指定params_file - // 当model_format为Paddle时,则需同时指定model_file & params_file - PartialFC(const std::string& model_file, const std::string& params_file = "", - const RuntimeOption& custom_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX); - - // 定义模型的名称 - std::string ModelName() const override { - return "deepinsight/insightface/recognition/partial_fc"; - } - - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 - bool Predict(cv::Mat* im, FaceRecognitionResult* result) override; - // 父类中包含 size, alpha, beta, swap_rb, l2_normalize 等基本可配置属性 - - private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 - bool Initialize() override; - - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 - bool Preprocess(Mat* mat, FDTensor* output) override; - - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 - bool Postprocess(std::vector& infer_result, - FaceRecognitionResult* result) override; -}; - -} // namespace faceid -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/faceid/contrib/partial_fc_pybind.cc b/csrcs/fastdeploy/vision/faceid/contrib/partial_fc_pybind.cc deleted file mode 100644 index b8cb31358..000000000 --- a/csrcs/fastdeploy/vision/faceid/contrib/partial_fc_pybind.cc +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/pybind/main.h" - -namespace fastdeploy { -void BindPartialFC(pybind11::module& m) { - // Bind Partial FC - pybind11::class_(m, "PartialFC") - .def(pybind11::init()) - .def("predict", - [](vision::faceid::PartialFC& self, pybind11::array& data) { - auto mat = PyArrayToCvMat(data); - vision::FaceRecognitionResult res; - self.Predict(&mat, &res); - return res; - }) - .def_readwrite("size", &vision::faceid::PartialFC::size) - .def_readwrite("alpha", &vision::faceid::PartialFC::alpha) - .def_readwrite("beta", &vision::faceid::PartialFC::beta) - .def_readwrite("swap_rb", &vision::faceid::PartialFC::swap_rb) - .def_readwrite("l2_normalize", &vision::faceid::PartialFC::l2_normalize); -} - -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/faceid/contrib/vpl.cc b/csrcs/fastdeploy/vision/faceid/contrib/vpl.cc deleted file mode 100644 index bb34d3993..000000000 --- a/csrcs/fastdeploy/vision/faceid/contrib/vpl.cc +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/faceid/contrib/vpl.h" -#include "fastdeploy/utils/perf.h" -#include "fastdeploy/vision/utils/utils.h" - -namespace fastdeploy { - -namespace vision { - -namespace faceid { - -VPL::VPL(const std::string& model_file, const std::string& params_file, - const RuntimeOption& custom_option, const Frontend& model_format) - : InsightFaceRecognitionModel(model_file, params_file, custom_option, - model_format) { - initialized = Initialize(); -} - -bool VPL::Initialize() { - // 如果初始化有变化 修改该子类函数 - // 这里需要判断backend是否已经initialized,如果是,则不应该再调用 - // InsightFaceRecognitionModel::Initialize() - // 因为该函数会对backend进行初始化, backend已经在父类的构造函数初始化 - // 这里只修改一些模型相关的属性 - - // (1) 如果父类初始化了backend - if (initialized) { - // (1.1) re-init parameters for specific sub-classes - size = {112, 112}; - alpha = {1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f}; - beta = {-1.f, -1.f, -1.f}; // RGB - swap_rb = true; - l2_normalize = false; - return true; - } - // (2) 如果父类没有初始化backend - if (!InsightFaceRecognitionModel::Initialize()) { - FDERROR << "Failed to initialize fastdeploy backend." << std::endl; - return false; - } - // (2.1) re-init parameters for specific sub-classes - size = {112, 112}; - alpha = {1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f}; - beta = {-1.f, -1.f, -1.f}; // RGB - swap_rb = true; - l2_normalize = false; - return true; -} - -bool VPL::Preprocess(Mat* mat, FDTensor* output) { - // 如果预处理有变化 修改该子类函数 - return InsightFaceRecognitionModel::Preprocess(mat, output); -} - -bool VPL::Postprocess(std::vector& infer_result, - FaceRecognitionResult* result) { - // 如果后处理有变化 修改该子类函数 - return InsightFaceRecognitionModel::Postprocess(infer_result, result); -} - -bool VPL::Predict(cv::Mat* im, FaceRecognitionResult* result) { - // 如果前后处理有变化 则override子类的Preprocess和Postprocess - // 如果前后处理有变化 此处应该调用子类自己的Preprocess和Postprocess - return InsightFaceRecognitionModel::Predict(im, result); -} - -} // namespace faceid -} // namespace vision -} // namespace fastdeploy \ No newline at end of file diff --git a/csrcs/fastdeploy/vision/faceid/contrib/vpl.h b/csrcs/fastdeploy/vision/faceid/contrib/vpl.h deleted file mode 100644 index 696d13ac3..000000000 --- a/csrcs/fastdeploy/vision/faceid/contrib/vpl.h +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include "fastdeploy/fastdeploy_model.h" -#include "fastdeploy/vision/common/processors/transform.h" -#include "fastdeploy/vision/common/result.h" -#include "fastdeploy/vision/faceid/contrib/insightface_rec.h" - -namespace fastdeploy { - -namespace vision { - -namespace faceid { - -class FASTDEPLOY_DECL VPL : public InsightFaceRecognitionModel { - public: - // 当model_format为ONNX时,无需指定params_file - // 当model_format为Paddle时,则需同时指定model_file & params_file - // VPL支持IResNet, IResNet1024骨干 - VPL(const std::string& model_file, const std::string& params_file = "", - const RuntimeOption& custom_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX); - - // 定义模型的名称 - std::string ModelName() const override { - return "deepinsight/insightface/recognition/vpl"; - } - - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 - bool Predict(cv::Mat* im, FaceRecognitionResult* result) override; - // 父类中包含 size, alpha, beta, swap_rb, l2_normalize 等基本可配置属性 - - private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 - bool Initialize() override; - - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 - bool Preprocess(Mat* mat, FDTensor* output) override; - - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 - bool Postprocess(std::vector& infer_result, - FaceRecognitionResult* result) override; -}; - -} // namespace faceid -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/faceid/contrib/vpl_pybind.cc b/csrcs/fastdeploy/vision/faceid/contrib/vpl_pybind.cc deleted file mode 100644 index 448cf3d3b..000000000 --- a/csrcs/fastdeploy/vision/faceid/contrib/vpl_pybind.cc +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/pybind/main.h" - -namespace fastdeploy { -void BindVPL(pybind11::module& m) { - // Bind VPL - pybind11::class_(m, "VPL") - .def(pybind11::init()) - .def("predict", - [](vision::faceid::VPL& self, pybind11::array& data) { - auto mat = PyArrayToCvMat(data); - vision::FaceRecognitionResult res; - self.Predict(&mat, &res); - return res; - }) - .def_readwrite("size", &vision::faceid::VPL::size) - .def_readwrite("alpha", &vision::faceid::VPL::alpha) - .def_readwrite("beta", &vision::faceid::VPL::beta) - .def_readwrite("swap_rb", &vision::faceid::VPL::swap_rb) - .def_readwrite("l2_normalize", &vision::faceid::VPL::l2_normalize); -} - -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/faceid/faceid_pybind.cc b/csrcs/fastdeploy/vision/faceid/faceid_pybind.cc deleted file mode 100644 index 40a1c6727..000000000 --- a/csrcs/fastdeploy/vision/faceid/faceid_pybind.cc +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/pybind/main.h" - -namespace fastdeploy { - -void BindArcFace(pybind11::module& m); -void BindInsightFaceRecognitionModel(pybind11::module& m); -void BindCosFace(pybind11::module& m); -void BindPartialFC(pybind11::module& m); -void BindVPL(pybind11::module& m); - -void BindFaceId(pybind11::module& m) { - auto faceid_module = m.def_submodule("faceid", "Face recognition models."); - BindInsightFaceRecognitionModel(faceid_module); - BindArcFace(faceid_module); - BindCosFace(faceid_module); - BindPartialFC(faceid_module); - BindVPL(faceid_module); -} -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/matting/contrib/modnet.cc b/csrcs/fastdeploy/vision/matting/contrib/modnet.cc deleted file mode 100644 index b98d055e3..000000000 --- a/csrcs/fastdeploy/vision/matting/contrib/modnet.cc +++ /dev/null @@ -1,175 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/matting/contrib/modnet.h" -#include "fastdeploy/utils/perf.h" -#include "fastdeploy/vision/utils/utils.h" - -namespace fastdeploy { - -namespace vision { - -namespace matting { - -MODNet::MODNet(const std::string& model_file, const std::string& params_file, - const RuntimeOption& custom_option, - const Frontend& model_format) { - if (model_format == Frontend::ONNX) { - valid_cpu_backends = {Backend::ORT}; // 指定可用的CPU后端 - valid_gpu_backends = {Backend::ORT, Backend::TRT}; // 指定可用的GPU后端 - } else { - valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; - valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; - } - runtime_option = custom_option; - runtime_option.model_format = model_format; - runtime_option.model_file = model_file; - runtime_option.params_file = params_file; - initialized = Initialize(); -} - -bool MODNet::Initialize() { - // parameters for preprocess - size = {256, 256}; - alpha = {1.f / 127.5f, 1.f / 127.5f, 1.f / 127.5f}; - beta = {-1.f, -1.f, -1.f}; // RGB - swap_rb = true; - - if (!InitRuntime()) { - FDERROR << "Failed to initialize fastdeploy backend." << std::endl; - return false; - } - return true; -} - -bool MODNet::Preprocess(Mat* mat, FDTensor* output, - std::map>* im_info) { - // 1. Resize - // 2. BGR2RGB - // 3. Convert(opencv style) or Normalize - // 4. HWC2CHW - int resize_w = size[0]; - int resize_h = size[1]; - if (resize_h != mat->Height() || resize_w != mat->Width()) { - Resize::Run(mat, resize_w, resize_h); - } - if (swap_rb) { - BGR2RGB::Run(mat); - } - - Convert::Run(mat, alpha, beta); - // Record output shape of preprocessed image - (*im_info)["output_shape"] = {mat->Height(), mat->Width()}; - - HWC2CHW::Run(mat); - Cast::Run(mat, "float"); - - mat->ShareWithTensor(output); - output->shape.insert(output->shape.begin(), 1); // reshape to n, h, w, c - return true; -} - -bool MODNet::Postprocess( - std::vector& infer_result, MattingResult* result, - const std::map>& im_info) { - FDASSERT((infer_result.size() == 1), - "The default number of output tensor must be 1 according to " - "modnet."); - FDTensor& alpha_tensor = infer_result.at(0); // (1,h,w,1) - FDASSERT((alpha_tensor.shape[0] == 1), "Only support batch =1 now."); - if (alpha_tensor.dtype != FDDataType::FP32) { - FDERROR << "Only support post process with float32 data." << std::endl; - return false; - } - - // 先获取alpha并resize (使用opencv) - auto iter_ipt = im_info.find("input_shape"); - auto iter_out = im_info.find("output_shape"); - FDASSERT(iter_out != im_info.end() && iter_ipt != im_info.end(), - "Cannot find input_shape or output_shape from im_info."); - int out_h = iter_out->second[0]; - int out_w = iter_out->second[1]; - int ipt_h = iter_ipt->second[0]; - int ipt_w = iter_ipt->second[1]; - - // TODO: 需要修改成FDTensor或Mat的运算 现在依赖cv::Mat - float* alpha_ptr = static_cast(alpha_tensor.Data()); - cv::Mat alpha_zero_copy_ref(out_h, out_w, CV_32FC1, alpha_ptr); - Mat alpha_resized(alpha_zero_copy_ref); // ref-only, zero copy. - if ((out_h != ipt_h) || (out_w != ipt_w)) { - // already allocated a new continuous memory after resize. - // cv::resize(alpha_resized, alpha_resized, cv::Size(ipt_w, ipt_h)); - Resize::Run(&alpha_resized, ipt_w, ipt_h, -1, -1); - } - - result->Clear(); - // note: must be setup shape before Resize - result->contain_foreground = false; - // 和输入原图大小对应的alpha - result->shape = {static_cast(ipt_h), static_cast(ipt_w)}; - int numel = ipt_h * ipt_w; - int nbytes = numel * sizeof(float); - result->Resize(numel); - std::memcpy(result->alpha.data(), alpha_resized.GetCpuMat()->data, nbytes); - return true; -} - -bool MODNet::Predict(cv::Mat* im, MattingResult* result) { -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_START(0) -#endif - - Mat mat(*im); - std::vector input_tensors(1); - - std::map> im_info; - // Record the shape of image and the shape of preprocessed image - im_info["input_shape"] = {mat.Height(), mat.Width()}; - im_info["output_shape"] = {mat.Height(), mat.Width()}; - - if (!Preprocess(&mat, &input_tensors[0], &im_info)) { - FDERROR << "Failed to preprocess input image." << std::endl; - return false; - } - -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_END(0, "Preprocess") - TIMERECORD_START(1) -#endif - - input_tensors[0].name = InputInfoOfRuntime(0).name; - std::vector output_tensors; - if (!Infer(input_tensors, &output_tensors)) { - FDERROR << "Failed to inference." << std::endl; - return false; - } -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_END(1, "Inference") - TIMERECORD_START(2) -#endif - - if (!Postprocess(output_tensors, result, im_info)) { - FDERROR << "Failed to post process." << std::endl; - return false; - } - -#ifdef FASTDEPLOY_DEBUG - TIMERECORD_END(2, "Postprocess") -#endif - return true; -} - -} // namespace matting -} // namespace vision -} // namespace fastdeploy \ No newline at end of file diff --git a/csrcs/fastdeploy/vision/matting/contrib/modnet.h b/csrcs/fastdeploy/vision/matting/contrib/modnet.h deleted file mode 100644 index 047fd3aea..000000000 --- a/csrcs/fastdeploy/vision/matting/contrib/modnet.h +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include "fastdeploy/fastdeploy_model.h" -#include "fastdeploy/vision/common/processors/transform.h" -#include "fastdeploy/vision/common/result.h" - -namespace fastdeploy { - -namespace vision { - -namespace matting { - -class FASTDEPLOY_DECL MODNet : public FastDeployModel { - public: - // 当model_format为ONNX时,无需指定params_file - // 当model_format为Paddle时,则需同时指定model_file & params_file - MODNet(const std::string& model_file, const std::string& params_file = "", - const RuntimeOption& custom_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX); - - // 定义模型的名称 - std::string ModelName() const { return "matting/MODNet"; } - - // 以下为一些可供用户修改的属性 - // tuple of (width, height), default (256, 256) - std::vector size; - // 归一化的 alpha 和 beta,x'=x*alpha+beta - std::vector alpha; - std::vector beta; - // whether to swap the B and R channel, such as BGR->RGB, default true. - bool swap_rb; - - // 模型预测接口,即用户调用的接口 - // im 为用户的输入数据,目前对于CV均定义为cv::Mat - // result 为模型预测的输出结构体 - bool Predict(cv::Mat* im, MattingResult* result); - - private: - // 初始化函数,包括初始化后端,以及其它模型推理需要涉及的操作 - bool Initialize(); - - // 输入图像预处理操作 - // Mat为FastDeploy定义的数据结构 - // FDTensor为预处理后的Tensor数据,传给后端进行推理 - bool Preprocess(Mat* mat, FDTensor* output, - std::map>* im_info); - - // 后端推理结果后处理,输出给用户 - // infer_result 为后端推理后的输出Tensor - // result 为模型预测的结果 - bool Postprocess(std::vector& infer_result, MattingResult* result, - const std::map>& im_info); -}; - -} // namespace matting -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/matting/contrib/modnet_pybind.cc b/csrcs/fastdeploy/vision/matting/contrib/modnet_pybind.cc deleted file mode 100644 index bfb8b1f88..000000000 --- a/csrcs/fastdeploy/vision/matting/contrib/modnet_pybind.cc +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/pybind/main.h" - -namespace fastdeploy { -void BindMODNet(pybind11::module& m) { - // Bind MODNet - pybind11::class_(m, "MODNet") - .def(pybind11::init()) - .def("predict", - [](vision::matting::MODNet& self, pybind11::array& data) { - auto mat = PyArrayToCvMat(data); - vision::MattingResult res; - self.Predict(&mat, &res); - return res; - }) - .def_readwrite("size", &vision::matting::MODNet::size) - .def_readwrite("alpha", &vision::matting::MODNet::alpha) - .def_readwrite("beta", &vision::matting::MODNet::beta) - .def_readwrite("swap_rb", &vision::matting::MODNet::swap_rb); -} - -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/matting/matting_pybind.cc b/csrcs/fastdeploy/vision/matting/matting_pybind.cc deleted file mode 100644 index e5fd78925..000000000 --- a/csrcs/fastdeploy/vision/matting/matting_pybind.cc +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/pybind/main.h" - -namespace fastdeploy { - -void BindMODNet(pybind11::module& m); - -void BindMatting(pybind11::module& m) { - auto matting_module = - m.def_submodule("matting", "Image object matting models."); - BindMODNet(matting_module); -} -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/ppcls/model.cc b/csrcs/fastdeploy/vision/ppcls/model.cc deleted file mode 100644 index c4e5b767c..000000000 --- a/csrcs/fastdeploy/vision/ppcls/model.cc +++ /dev/null @@ -1,153 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/ppcls/model.h" -#include "fastdeploy/vision/utils/utils.h" -#include "yaml-cpp/yaml.h" - -namespace fastdeploy { -namespace vision { -namespace ppcls { - -Model::Model(const std::string& model_file, const std::string& params_file, - const std::string& config_file, const RuntimeOption& custom_option, - const Frontend& model_format) { - config_file_ = config_file; - valid_cpu_backends = {Backend::ORT, Backend::PDINFER}; - valid_gpu_backends = {Backend::ORT, Backend::PDINFER}; - runtime_option = custom_option; - runtime_option.model_format = model_format; - runtime_option.model_file = model_file; - runtime_option.params_file = params_file; - initialized = Initialize(); -} - -bool Model::Initialize() { - if (!BuildPreprocessPipelineFromConfig()) { - FDERROR << "Failed to build preprocess pipeline from configuration file." - << std::endl; - return false; - } - if (!InitRuntime()) { - FDERROR << "Failed to initialize fastdeploy backend." << std::endl; - return false; - } - return true; -} - -bool Model::BuildPreprocessPipelineFromConfig() { - processors_.clear(); - YAML::Node cfg; - try { - cfg = YAML::LoadFile(config_file_); - } catch (YAML::BadFile& e) { - FDERROR << "Failed to load yaml file " << config_file_ - << ", maybe you should check this file." << std::endl; - return false; - } - auto preprocess_cfg = cfg["PreProcess"]["transform_ops"]; - processors_.push_back(std::make_shared()); - for (const auto& op : preprocess_cfg) { - FDASSERT(op.IsMap(), - "Require the transform information in yaml be Map type."); - auto op_name = op.begin()->first.as(); - if (op_name == "ResizeImage") { - int target_size = op.begin()->second["resize_short"].as(); - bool use_scale = false; - int interp = 1; - processors_.push_back( - std::make_shared(target_size, 1, use_scale)); - } else if (op_name == "CropImage") { - int width = op.begin()->second["size"].as(); - int height = op.begin()->second["size"].as(); - processors_.push_back(std::make_shared(width, height)); - } else if (op_name == "NormalizeImage") { - auto mean = op.begin()->second["mean"].as>(); - auto std = op.begin()->second["std"].as>(); - auto scale = op.begin()->second["scale"].as(); - FDASSERT((scale - 0.00392157) < 1e-06 && (scale - 0.00392157) > -1e-06, - "Only support scale in Normalize be 0.00392157, means the pixel " - "is in range of [0, 255]."); - processors_.push_back(std::make_shared(mean, std)); - } else if (op_name == "ToCHWImage") { - processors_.push_back(std::make_shared()); - } else { - FDERROR << "Unexcepted preprocess operator: " << op_name << "." - << std::endl; - return false; - } - } - return true; -} - -bool Model::Preprocess(Mat* mat, FDTensor* output) { - for (size_t i = 0; i < processors_.size(); ++i) { - if (!(*(processors_[i].get()))(mat)) { - FDERROR << "Failed to process image data in " << processors_[i]->Name() - << "." << std::endl; - return false; - } - } - - int channel = mat->Channels(); - int width = mat->Width(); - int height = mat->Height(); - output->name = InputInfoOfRuntime(0).name; - output->SetExternalData({1, channel, height, width}, FDDataType::FP32, - mat->GetCpuMat()->ptr()); - return true; -} - -bool Model::Postprocess(const FDTensor& infer_result, ClassifyResult* result, - int topk) { - int num_classes = infer_result.shape[1]; - const float* infer_result_buffer = - reinterpret_cast(infer_result.data.data()); - topk = std::min(num_classes, topk); - result->label_ids = - utils::TopKIndices(infer_result_buffer, num_classes, topk); - result->scores.resize(topk); - for (int i = 0; i < topk; ++i) { - result->scores[i] = *(infer_result_buffer + result->label_ids[i]); - } - return true; -} - -bool Model::Predict(cv::Mat* im, ClassifyResult* result, int topk) { - Mat mat(*im); - std::vector processed_data(1); - if (!Preprocess(&mat, &(processed_data[0]))) { - FDERROR << "Failed to preprocess input data while using model:" - << ModelName() << "." << std::endl; - return false; - } - - std::vector infer_result(1); - if (!Infer(processed_data, &infer_result)) { - FDERROR << "Failed to inference while using model:" << ModelName() << "." - << std::endl; - return false; - } - - if (!Postprocess(infer_result[0], result, topk)) { - FDERROR << "Failed to postprocess while using model:" << ModelName() << "." - << std::endl; - return false; - } - return true; -} - -} // namespace ppcls -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/ppcls/model.h b/csrcs/fastdeploy/vision/ppcls/model.h deleted file mode 100644 index 71800a7d7..000000000 --- a/csrcs/fastdeploy/vision/ppcls/model.h +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include "fastdeploy/fastdeploy_model.h" -#include "fastdeploy/vision/common/processors/transform.h" -#include "fastdeploy/vision/common/result.h" - -namespace fastdeploy { -namespace vision { -namespace ppcls { - -class FASTDEPLOY_DECL Model : public FastDeployModel { - public: - Model(const std::string& model_file, const std::string& params_file, - const std::string& config_file, - const RuntimeOption& custom_option = RuntimeOption(), - const Frontend& model_format = Frontend::PADDLE); - - std::string ModelName() const { return "ppclas-classify"; } - - // TODO(jiangjiajun) Batch is on the way - virtual bool Predict(cv::Mat* im, ClassifyResult* result, int topk = 1); - - private: - bool Initialize(); - - bool BuildPreprocessPipelineFromConfig(); - - bool Preprocess(Mat* mat, FDTensor* outputs); - - bool Postprocess(const FDTensor& infer_result, ClassifyResult* result, - int topk = 1); - - std::vector> processors_; - std::string config_file_; -}; -} // namespace ppcls -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/ppcls/ppcls_pybind.cc b/csrcs/fastdeploy/vision/ppcls/ppcls_pybind.cc deleted file mode 100644 index 10ff5ee10..000000000 --- a/csrcs/fastdeploy/vision/ppcls/ppcls_pybind.cc +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#include "fastdeploy/pybind/main.h" - -namespace fastdeploy { -void BindPPCls(pybind11::module& m) { - auto ppcls_module = m.def_submodule("ppcls", "Module to deploy PaddleClas."); - pybind11::class_(ppcls_module, "Model") - .def(pybind11::init()) - .def("predict", - [](vision::ppcls::Model& self, pybind11::array& data, int topk = 1) { - auto mat = PyArrayToCvMat(data); - vision::ClassifyResult res; - self.Predict(&mat, &res, topk); - return res; - }); -} -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/ppseg/model.cc b/csrcs/fastdeploy/vision/ppseg/model.cc deleted file mode 100644 index 7f692c6a7..000000000 --- a/csrcs/fastdeploy/vision/ppseg/model.cc +++ /dev/null @@ -1,232 +0,0 @@ -#include "fastdeploy/vision/ppseg/model.h" -#include "fastdeploy/vision.h" -#include "fastdeploy/vision/utils/utils.h" -#include "yaml-cpp/yaml.h" - -namespace fastdeploy { -namespace vision { -namespace ppseg { - -Model::Model(const std::string& model_file, const std::string& params_file, - const std::string& config_file, const RuntimeOption& custom_option, - const Frontend& model_format) { - config_file_ = config_file; - valid_cpu_backends = {Backend::PDINFER, Backend::ORT}; - valid_gpu_backends = {Backend::PDINFER, Backend::ORT}; - runtime_option = custom_option; - runtime_option.model_format = model_format; - runtime_option.model_file = model_file; - runtime_option.params_file = params_file; - initialized = Initialize(); -} - -bool Model::Initialize() { - if (!BuildPreprocessPipelineFromConfig()) { - FDERROR << "Failed to build preprocess pipeline from configuration file." - << std::endl; - return false; - } - if (!InitRuntime()) { - FDERROR << "Failed to initialize fastdeploy backend." << std::endl; - return false; - } - return true; -} - -bool Model::BuildPreprocessPipelineFromConfig() { - processors_.clear(); - YAML::Node cfg; - processors_.push_back(std::make_shared()); - try { - cfg = YAML::LoadFile(config_file_); - } catch (YAML::BadFile& e) { - FDERROR << "Failed to load yaml file " << config_file_ - << ", maybe you should check this file." << std::endl; - return false; - } - - if (cfg["Deploy"]["transforms"]) { - auto preprocess_cfg = cfg["Deploy"]["transforms"]; - for (const auto& op : preprocess_cfg) { - FDASSERT(op.IsMap(), - "Require the transform information in yaml be Map type."); - if (op["type"].as() == "Normalize") { - std::vector mean = {0.5, 0.5, 0.5}; - std::vector std = {0.5, 0.5, 0.5}; - if (op["mean"]) { - mean = op["mean"].as>(); - } - if (op["std"]) { - std = op["std"].as>(); - } - processors_.push_back(std::make_shared(mean, std)); - - } else if (op["type"].as() == "Resize") { - const auto& target_size = op["target_size"]; - int resize_width = target_size[0].as(); - int resize_height = target_size[1].as(); - is_resized = true; - processors_.push_back( - std::make_shared(resize_width, resize_height)); - } - } - processors_.push_back(std::make_shared()); - } - return true; -} - -bool Model::Preprocess(Mat* mat, FDTensor* output, - std::map>* im_info) { - for (size_t i = 0; i < processors_.size(); ++i) { - if (processors_[i]->Name().compare("Resize") == 0) { - auto processor = dynamic_cast(processors_[i].get()); - int resize_width = -1; - int resize_height = -1; - std::tie(resize_width, resize_height) = processor->GetWidthAndHeight(); - if (is_vertical_screen && (resize_width > resize_height)) { - if (processor->SetWidthAndHeight(resize_height, resize_width)) { - FDERROR << "Failed to set Resize processor width and height " - << processors_[i]->Name() << "." << std::endl; - } - } - } - if (!(*(processors_[i].get()))(mat)) { - FDERROR << "Failed to process image data in " << processors_[i]->Name() - << "." << std::endl; - return false; - } - } - - // Record output shape of preprocessed image - (*im_info)["output_shape"] = {static_cast(mat->Height()), - static_cast(mat->Width())}; - - mat->ShareWithTensor(output); - output->shape.insert(output->shape.begin(), 1); - output->name = InputInfoOfRuntime(0).name; - return true; -} - -bool Model::Postprocess(FDTensor& infer_result, SegmentationResult* result, - std::map>* im_info) { - // PaddleSeg has three types of inference output: - // 1. output with argmax and without softmax. 3-D matrix CHW, Channel - // always 1, the element in matrix is classified label_id INT64 Type. - // 2. output without argmax and without softmax. 4-D matrix NCHW, N always - // 1, Channel is the num of classes. The element is the logits of classes - // FP32 - // 3. output without argmax and with softmax. 4-D matrix NCHW, the result - // of 2 with softmax layer - // Fastdeploy output: - // 1. label_map - // 2. score_map(optional) - // 3. shape: 2-D HW - FDASSERT(infer_result.dtype == FDDataType::INT64 || - infer_result.dtype == FDDataType::FP32, - "Require the data type of output is int64 or fp32, but now it's " + - Str(infer_result.dtype) + "."); - result->Clear(); - - if (infer_result.shape.size() == 4) { - FDASSERT(infer_result.shape[0] == 1, "Only support batch size = 1."); - // output without argmax - result->contain_score_map = true; - utils::NCHW2NHWC(infer_result); - } - - // for resize mat below - FDTensor new_infer_result; - Mat* mat = nullptr; - if (is_resized) { - cv::Mat temp_mat; - utils::FDTensor2FP32CVMat(temp_mat, infer_result, - result->contain_score_map); - - // original image shape - auto iter_ipt = (*im_info).find("input_shape"); - FDASSERT(iter_ipt != im_info->end(), - "Cannot find input_shape from im_info."); - int ipt_h = iter_ipt->second[0]; - int ipt_w = iter_ipt->second[1]; - - mat = new Mat(temp_mat); - - Resize::Run(mat, ipt_w, ipt_h, -1, -1, 1); - mat->ShareWithTensor(&new_infer_result); - new_infer_result.shape.insert(new_infer_result.shape.begin(), 1); - result->shape = new_infer_result.shape; - } else { - result->shape = infer_result.shape; - } - int out_num = - std::accumulate(result->shape.begin(), result->shape.begin() + 3, 1, - std::multiplies()); - // NCHW remove N or CHW remove C - result->shape.erase(result->shape.begin()); - result->Resize(out_num); - if (result->contain_score_map) { - // output with label_map and score_map - float_t* infer_result_buffer = nullptr; - if (is_resized) { - infer_result_buffer = static_cast(new_infer_result.Data()); - } else { - infer_result_buffer = static_cast(infer_result.Data()); - } - // argmax - utils::ArgmaxScoreMap(infer_result_buffer, result, with_softmax); - result->shape.erase(result->shape.begin() + 2); - } else { - // output only with label_map - if (is_resized) { - float_t* infer_result_buffer = - static_cast(new_infer_result.Data()); - for (int i = 0; i < out_num; i++) { - result->label_map[i] = static_cast(*(infer_result_buffer + i)); - } - } else { - const int64_t* infer_result_buffer = - reinterpret_cast(infer_result.Data()); - for (int i = 0; i < out_num; i++) { - result->label_map[i] = static_cast(*(infer_result_buffer + i)); - } - } - } - delete mat; - mat = nullptr; - return true; -} - -bool Model::Predict(cv::Mat* im, SegmentationResult* result) { - Mat mat(*im); - std::vector processed_data(1); - - std::map> im_info; - - // Record the shape of image and the shape of preprocessed image - im_info["input_shape"] = {static_cast(mat.Height()), - static_cast(mat.Width())}; - im_info["output_shape"] = {static_cast(mat.Height()), - static_cast(mat.Width())}; - - if (!Preprocess(&mat, &(processed_data[0]), &im_info)) { - FDERROR << "Failed to preprocess input data while using model:" - << ModelName() << "." << std::endl; - return false; - } - std::vector infer_result(1); - if (!Infer(processed_data, &infer_result)) { - FDERROR << "Failed to inference while using model:" << ModelName() << "." - << std::endl; - return false; - } - if (!Postprocess(infer_result[0], result, &im_info)) { - FDERROR << "Failed to postprocess while using model:" << ModelName() << "." - << std::endl; - return false; - } - return true; -} - -} // namespace ppseg -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/ppseg/model.h b/csrcs/fastdeploy/vision/ppseg/model.h deleted file mode 100644 index 72f8dbc64..000000000 --- a/csrcs/fastdeploy/vision/ppseg/model.h +++ /dev/null @@ -1,43 +0,0 @@ -#pragma once -#include "fastdeploy/fastdeploy_model.h" -#include "fastdeploy/vision/common/processors/transform.h" -#include "fastdeploy/vision/common/result.h" - -namespace fastdeploy { -namespace vision { -namespace ppseg { - -class FASTDEPLOY_DECL Model : public FastDeployModel { - public: - Model(const std::string& model_file, const std::string& params_file, - const std::string& config_file, - const RuntimeOption& custom_option = RuntimeOption(), - const Frontend& model_format = Frontend::PADDLE); - - std::string ModelName() const { return "ppseg"; } - - virtual bool Predict(cv::Mat* im, SegmentationResult* result); - - bool with_softmax = false; - - bool is_vertical_screen = false; - - private: - bool Initialize(); - - bool BuildPreprocessPipelineFromConfig(); - - bool Preprocess(Mat* mat, FDTensor* outputs, - std::map>* im_info); - - bool Postprocess(FDTensor& infer_result, SegmentationResult* result, - std::map>* im_info); - - bool is_resized = false; - - std::vector> processors_; - std::string config_file_; -}; -} // namespace ppseg -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/ppseg/ppseg_pybind.cc b/csrcs/fastdeploy/vision/ppseg/ppseg_pybind.cc deleted file mode 100644 index 949c27487..000000000 --- a/csrcs/fastdeploy/vision/ppseg/ppseg_pybind.cc +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#include "fastdeploy/pybind/main.h" - -namespace fastdeploy { -void BindPPSeg(pybind11::module& m) { - auto ppseg_module = - m.def_submodule("ppseg", "Module to deploy PaddleSegmentation."); - pybind11::class_(ppseg_module, "Model") - .def(pybind11::init()) - .def("predict", - [](vision::ppseg::Model& self, pybind11::array& data) { - auto mat = PyArrayToCvMat(data); - vision::SegmentationResult* res = new vision::SegmentationResult(); - // self.Predict(&mat, &res); - self.Predict(&mat, res); - return res; - }) - .def_readwrite("with_softmax", &vision::ppseg::Model::with_softmax) - .def_readwrite("is_vertical_screen", - &vision::ppseg::Model::is_vertical_screen); -} -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/utils/FDTensor2CVMat.cc b/csrcs/fastdeploy/vision/utils/FDTensor2CVMat.cc deleted file mode 100644 index fdd110cb8..000000000 --- a/csrcs/fastdeploy/vision/utils/FDTensor2CVMat.cc +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/utils/utils.h" - -namespace fastdeploy { -namespace vision { -namespace utils { - -void FDTensor2FP32CVMat(cv::Mat& mat, FDTensor& infer_result, - bool contain_score_map) { - // output with argmax channel is 1 - int channel = 1; - int height = infer_result.shape[1]; - int width = infer_result.shape[2]; - - if (contain_score_map) { - // output without argmax and convent to NHWC - channel = infer_result.shape[3]; - } - // create FP32 cvmat - if (infer_result.dtype == FDDataType::INT64) { - FDWARNING << "The PaddleSeg model is exported with argmax. Inference " - "result type is " + - Str(infer_result.dtype) + - ". If you want the edge of segmentation image more " - "smoother. Please export model with --without_argmax " - "--with_softmax." - << std::endl; - int64_t chw = channel * height * width; - int64_t* infer_result_buffer = static_cast(infer_result.Data()); - std::vector float_result_buffer(chw); - mat = cv::Mat(height, width, CV_32FC(channel)); - int index = 0; - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - mat.at(i, j) = - static_cast(infer_result_buffer[index++]); - } - } - } else if (infer_result.dtype == FDDataType::FP32) { - mat = cv::Mat(height, width, CV_32FC(channel), infer_result.Data()); - } -} - -} // namespace utils -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/utils/cosine_similarity.cc b/csrcs/fastdeploy/vision/utils/cosine_similarity.cc deleted file mode 100644 index 70d4e31dd..000000000 --- a/csrcs/fastdeploy/vision/utils/cosine_similarity.cc +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/utils/utils.h" - -namespace fastdeploy { -namespace vision { -namespace utils { - -float CosineSimilarity(const std::vector& a, const std::vector& b, - bool normalized) { - // 计算余弦相似度 - FDASSERT((a.size() == b.size()) && (a.size() != 0), - "The size of a and b must be equal and >= 1."); - size_t num_val = a.size(); - if (normalized) { - float mul_a = 0.f, mul_b = 0.f, mul_ab = 0.f; - for (size_t i = 0; i < num_val; ++i) { - mul_a += (a[i] * a[i]); - mul_b += (b[i] * b[i]); - mul_ab += (a[i] * b[i]); - } - return (mul_ab / (std::sqrt(mul_a) * std::sqrt(mul_b))); - } - auto norm_a = L2Normalize(a); - auto norm_b = L2Normalize(b); - float mul_a = 0.f, mul_b = 0.f, mul_ab = 0.f; - for (size_t i = 0; i < num_val; ++i) { - mul_a += (norm_a[i] * norm_a[i]); - mul_b += (norm_b[i] * norm_b[i]); - mul_ab += (norm_a[i] * norm_b[i]); - } - return (mul_ab / (std::sqrt(mul_a) * std::sqrt(mul_b))); -} - -} // namespace utils -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/utils/l2_normalize.cc b/csrcs/fastdeploy/vision/utils/l2_normalize.cc deleted file mode 100644 index f5752b848..000000000 --- a/csrcs/fastdeploy/vision/utils/l2_normalize.cc +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/utils/utils.h" - -namespace fastdeploy { -namespace vision { -namespace utils { - -std::vector L2Normalize(const std::vector& values) { - size_t num_val = values.size(); - if (num_val == 0) { - return {}; - } - std::vector norm; - float l2_sum_val = 0.f; - for (size_t i = 0; i < num_val; ++i) { - l2_sum_val += (values[i] * values[i]); - } - float l2_sum_sqrt = std::sqrt(l2_sum_val); - norm.resize(num_val); - for (size_t i = 0; i < num_val; ++i) { - norm[i] = values[i] / l2_sum_sqrt; - } - return norm; -} - -} // namespace utils -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/utils/nms.cc b/csrcs/fastdeploy/vision/utils/nms.cc deleted file mode 100644 index 900acf84d..000000000 --- a/csrcs/fastdeploy/vision/utils/nms.cc +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/utils/perf.h" -#include "fastdeploy/vision/utils/utils.h" - -namespace fastdeploy { -namespace vision { -namespace utils { - -// The implementation refers to -// https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.4/deploy/cpp/src/utils.cc -void NMS(DetectionResult* result, float iou_threshold) { - utils::SortDetectionResult(result); - - std::vector area_of_boxes(result->boxes.size()); - std::vector suppressed(result->boxes.size(), 0); - for (size_t i = 0; i < result->boxes.size(); ++i) { - area_of_boxes[i] = (result->boxes[i][2] - result->boxes[i][0]) * - (result->boxes[i][3] - result->boxes[i][1]); - } - - for (size_t i = 0; i < result->boxes.size(); ++i) { - if (suppressed[i] == 1) { - continue; - } - for (size_t j = i + 1; j < result->boxes.size(); ++j) { - if (suppressed[j] == 1) { - continue; - } - float xmin = std::max(result->boxes[i][0], result->boxes[j][0]); - float ymin = std::max(result->boxes[i][1], result->boxes[j][1]); - float xmax = std::min(result->boxes[i][2], result->boxes[j][2]); - float ymax = std::min(result->boxes[i][3], result->boxes[j][3]); - float overlap_w = std::max(0.0f, xmax - xmin); - float overlap_h = std::max(0.0f, ymax - ymin); - float overlap_area = overlap_w * overlap_h; - float overlap_ratio = - overlap_area / (area_of_boxes[i] + area_of_boxes[j] - overlap_area); - if (overlap_ratio > iou_threshold) { - suppressed[j] = 1; - } - } - } - DetectionResult backup(*result); - result->Clear(); - result->Reserve(suppressed.size()); - for (size_t i = 0; i < suppressed.size(); ++i) { - if (suppressed[i] == 1) { - continue; - } - result->boxes.emplace_back(backup.boxes[i]); - result->scores.push_back(backup.scores[i]); - result->label_ids.push_back(backup.label_ids[i]); - } -} - -void NMS(FaceDetectionResult* result, float iou_threshold) { - utils::SortDetectionResult(result); - - std::vector area_of_boxes(result->boxes.size()); - std::vector suppressed(result->boxes.size(), 0); - for (size_t i = 0; i < result->boxes.size(); ++i) { - area_of_boxes[i] = (result->boxes[i][2] - result->boxes[i][0]) * - (result->boxes[i][3] - result->boxes[i][1]); - } - - for (size_t i = 0; i < result->boxes.size(); ++i) { - if (suppressed[i] == 1) { - continue; - } - for (size_t j = i + 1; j < result->boxes.size(); ++j) { - if (suppressed[j] == 1) { - continue; - } - float xmin = std::max(result->boxes[i][0], result->boxes[j][0]); - float ymin = std::max(result->boxes[i][1], result->boxes[j][1]); - float xmax = std::min(result->boxes[i][2], result->boxes[j][2]); - float ymax = std::min(result->boxes[i][3], result->boxes[j][3]); - float overlap_w = std::max(0.0f, xmax - xmin); - float overlap_h = std::max(0.0f, ymax - ymin); - float overlap_area = overlap_w * overlap_h; - float overlap_ratio = - overlap_area / (area_of_boxes[i] + area_of_boxes[j] - overlap_area); - if (overlap_ratio > iou_threshold) { - suppressed[j] = 1; - } - } - } - FaceDetectionResult backup(*result); - int landmarks_per_face = result->landmarks_per_face; - - result->Clear(); - // don't forget to reset the landmarks_per_face - // before apply Reserve method. - result->landmarks_per_face = landmarks_per_face; - result->Reserve(suppressed.size()); - for (size_t i = 0; i < suppressed.size(); ++i) { - if (suppressed[i] == 1) { - continue; - } - result->boxes.emplace_back(backup.boxes[i]); - result->scores.push_back(backup.scores[i]); - // landmarks (if have) - if (result->landmarks_per_face > 0) { - for (size_t j = 0; j < result->landmarks_per_face; ++j) { - result->landmarks.emplace_back( - backup.landmarks[i * result->landmarks_per_face + j]); - } - } - } -} - -} // namespace utils -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/utils/sort_det_res.cc b/csrcs/fastdeploy/vision/utils/sort_det_res.cc deleted file mode 100644 index 93dbb6969..000000000 --- a/csrcs/fastdeploy/vision/utils/sort_det_res.cc +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/utils/utils.h" - -namespace fastdeploy { -namespace vision { -namespace utils { - -void Merge(DetectionResult* result, size_t low, size_t mid, size_t high) { - std::vector>& boxes = result->boxes; - std::vector& scores = result->scores; - std::vector& label_ids = result->label_ids; - std::vector> temp_boxes(boxes); - std::vector temp_scores(scores); - std::vector temp_label_ids(label_ids); - size_t i = low; - size_t j = mid + 1; - size_t k = i; - for (; i <= mid && j <= high; k++) { - if (temp_scores[i] >= temp_scores[j]) { - scores[k] = temp_scores[i]; - label_ids[k] = temp_label_ids[i]; - boxes[k] = temp_boxes[i]; - i++; - } else { - scores[k] = temp_scores[j]; - label_ids[k] = temp_label_ids[j]; - boxes[k] = temp_boxes[j]; - j++; - } - } - while (i <= mid) { - scores[k] = temp_scores[i]; - label_ids[k] = temp_label_ids[i]; - boxes[k] = temp_boxes[i]; - k++; - i++; - } - while (j <= high) { - scores[k] = temp_scores[j]; - label_ids[k] = temp_label_ids[j]; - boxes[k] = temp_boxes[j]; - k++; - j++; - } -} - -void MergeSort(DetectionResult* result, size_t low, size_t high) { - if (low < high) { - size_t mid = (high - low) / 2 + low; - MergeSort(result, low, mid); - MergeSort(result, mid + 1, high); - Merge(result, low, mid, high); - } -} - -void SortDetectionResult(DetectionResult* result) { - size_t low = 0; - size_t high = result->scores.size(); - if (high == 0) { - return; - } - high = high - 1; - MergeSort(result, low, high); -} - -} // namespace utils -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/utils/sort_face_det_res.cc b/csrcs/fastdeploy/vision/utils/sort_face_det_res.cc deleted file mode 100644 index 34150f9ac..000000000 --- a/csrcs/fastdeploy/vision/utils/sort_face_det_res.cc +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision/utils/utils.h" - -namespace fastdeploy { -namespace vision { -namespace utils { - -void SortDetectionResult(FaceDetectionResult* result) { - // sort face detection results with landmarks or not. - if (result->boxes.size() == 0) { - return; - } - int landmarks_per_face = result->landmarks_per_face; - if (landmarks_per_face > 0) { - FDASSERT( - (result->landmarks.size() == result->boxes.size() * landmarks_per_face), - "The size of landmarks != boxes.size * landmarks_per_face."); - } - - // argsort for scores. - std::vector indices; - indices.resize(result->boxes.size()); - for (size_t i = 0; i < result->boxes.size(); ++i) { - indices[i] = i; - } - std::vector& scores = result->scores; - std::sort(indices.begin(), indices.end(), - [&scores](size_t a, size_t b) { return scores[a] > scores[b]; }); - - // reorder boxes, scores, landmarks (if have). - FaceDetectionResult backup(*result); - result->Clear(); - // don't forget to reset the landmarks_per_face - // before apply Reserve method. - result->landmarks_per_face = landmarks_per_face; - result->Reserve(indices.size()); - if (landmarks_per_face > 0) { - for (size_t i = 0; i < indices.size(); ++i) { - result->boxes.emplace_back(backup.boxes[indices[i]]); - result->scores.push_back(backup.scores[indices[i]]); - for (size_t j = 0; j < landmarks_per_face; ++j) { - result->landmarks.emplace_back( - backup.landmarks[indices[i] * landmarks_per_face + j]); - } - } - } else { - for (size_t i = 0; i < indices.size(); ++i) { - result->boxes.emplace_back(backup.boxes[indices[i]]); - result->scores.push_back(backup.scores[indices[i]]); - } - } -} - -} // namespace utils -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/utils/utils.h b/csrcs/fastdeploy/vision/utils/utils.h deleted file mode 100644 index 02cf16e9c..000000000 --- a/csrcs/fastdeploy/vision/utils/utils.h +++ /dev/null @@ -1,140 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include "fastdeploy/core/fd_tensor.h" -#include "fastdeploy/utils/utils.h" -#include "fastdeploy/vision/common/result.h" - -namespace fastdeploy { -namespace vision { -namespace utils { -// topk sometimes is a very small value -// so this implementation is simple but I don't think it will -// cost too much time -// Also there may be cause problem since we suppose the minimum value is -// -99999999 -// Do not use this function on array which topk contains value less than -// -99999999 -template -std::vector TopKIndices(const T* array, int array_size, int topk) { - topk = std::min(array_size, topk); - std::vector res(topk); - std::set searched; - for (int32_t i = 0; i < topk; ++i) { - T min = -99999999; - for (int32_t j = 0; j < array_size; ++j) { - if (searched.find(j) != searched.end()) { - continue; - } - if (*(array + j) > min) { - res[i] = j; - min = *(array + j); - } - } - searched.insert(res[i]); - } - return res; -} - -template -void ArgmaxScoreMap(T infer_result_buffer, SegmentationResult* result, - bool with_softmax) { - int64_t height = result->shape[0]; - int64_t width = result->shape[1]; - int64_t num_classes = result->shape[2]; - int index = 0; - for (size_t i = 0; i < height; ++i) { - for (size_t j = 0; j < width; ++j) { - int64_t s = (i * width + j) * num_classes; - T max_class_score = std::max_element( - infer_result_buffer + s, infer_result_buffer + s + num_classes); - int label_id = std::distance(infer_result_buffer + s, max_class_score); - if (label_id >= 255) { - FDWARNING << "label_id is stored by uint8_t, now the value is bigger " - "than 255, it's " - << static_cast(label_id) << "." << std::endl; - } - result->label_map[index] = static_cast(label_id); - - if (with_softmax) { - double_t total = 0; - for (int k = 0; k < num_classes; k++) { - total += exp(*(infer_result_buffer + s + k) - *max_class_score); - } - double_t softmax_class_score = 1 / total; - result->score_map[index] = static_cast(softmax_class_score); - - } else { - result->score_map[index] = static_cast(*max_class_score); - } - index++; - } - } -} - -template -void NCHW2NHWC(FDTensor& infer_result) { - T* infer_result_buffer = reinterpret_cast(infer_result.MutableData()); - int num = infer_result.shape[0]; - int channel = infer_result.shape[1]; - int height = infer_result.shape[2]; - int width = infer_result.shape[3]; - int chw = channel * height * width; - int wc = width * channel; - int wh = width * height; - std::vector hwc_data(chw); - int index = 0; - for (int n = 0; n < num; n++) { - for (int c = 0; c < channel; c++) { - for (int h = 0; h < height; h++) { - for (int w = 0; w < width; w++) { - hwc_data[n * chw + h * wc + w * channel + c] = - *(infer_result_buffer + index); - index++; - } - } - } - } - std::memcpy(infer_result.MutableData(), hwc_data.data(), - num * chw * sizeof(T)); - infer_result.shape = {num, height, width, channel}; -} - -void FDTensor2FP32CVMat(cv::Mat& mat, FDTensor& infer_result, - bool contain_score_map); - -void NMS(DetectionResult* output, float iou_threshold = 0.5); - -void NMS(FaceDetectionResult* result, float iou_threshold = 0.5); - -// MergeSort -void SortDetectionResult(DetectionResult* output); - -void SortDetectionResult(FaceDetectionResult* result); - -// L2 Norm / cosine similarity (for face recognition, ...) -FASTDEPLOY_DECL std::vector L2Normalize( - const std::vector& values); - -FASTDEPLOY_DECL float CosineSimilarity(const std::vector& a, - const std::vector& b, - bool normalized = true); - -} // namespace utils -} // namespace vision -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/vision_pybind.cc b/csrcs/fastdeploy/vision/vision_pybind.cc deleted file mode 100644 index 6528dd22b..000000000 --- a/csrcs/fastdeploy/vision/vision_pybind.cc +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/pybind/main.h" - -namespace fastdeploy { - -void BindPPCls(pybind11::module& m); -void BindPPDet(pybind11::module& m); -void BindPPSeg(pybind11::module& m); - -void BindDetection(pybind11::module& m); -void BindMatting(pybind11::module& m); -void BindFaceDet(pybind11::module& m); -void BindFaceId(pybind11::module& m); -#ifdef ENABLE_VISION_VISUALIZE -void BindVisualize(pybind11::module& m); -#endif - -void BindVision(pybind11::module& m) { - pybind11::class_(m, "ClassifyResult") - .def(pybind11::init()) - .def_readwrite("label_ids", &vision::ClassifyResult::label_ids) - .def_readwrite("scores", &vision::ClassifyResult::scores) - .def("__repr__", &vision::ClassifyResult::Str) - .def("__str__", &vision::ClassifyResult::Str); - - pybind11::class_(m, "DetectionResult") - .def(pybind11::init()) - .def_readwrite("boxes", &vision::DetectionResult::boxes) - .def_readwrite("scores", &vision::DetectionResult::scores) - .def_readwrite("label_ids", &vision::DetectionResult::label_ids) - .def("__repr__", &vision::DetectionResult::Str) - .def("__str__", &vision::DetectionResult::Str); - - pybind11::class_(m, "FaceDetectionResult") - .def(pybind11::init()) - .def_readwrite("boxes", &vision::FaceDetectionResult::boxes) - .def_readwrite("scores", &vision::FaceDetectionResult::scores) - .def_readwrite("landmarks", &vision::FaceDetectionResult::landmarks) - .def_readwrite("landmarks_per_face", - &vision::FaceDetectionResult::landmarks_per_face) - .def("__repr__", &vision::FaceDetectionResult::Str) - .def("__str__", &vision::FaceDetectionResult::Str); - - pybind11::class_(m, "SegmentationResult") - .def(pybind11::init()) - .def_readwrite("label_map", &vision::SegmentationResult::label_map) - .def_readwrite("score_map", &vision::SegmentationResult::score_map) - .def_readwrite("shape", &vision::SegmentationResult::shape) - .def_readwrite("shape", &vision::SegmentationResult::shape) - .def("__repr__", &vision::SegmentationResult::Str) - .def("__str__", &vision::SegmentationResult::Str); - - pybind11::class_(m, "FaceRecognitionResult") - .def(pybind11::init()) - .def_readwrite("embedding", &vision::FaceRecognitionResult::embedding) - .def("__repr__", &vision::FaceRecognitionResult::Str) - .def("__str__", &vision::FaceRecognitionResult::Str); - - pybind11::class_(m, "MattingResult") - .def(pybind11::init()) - .def_readwrite("alpha", &vision::MattingResult::alpha) - .def_readwrite("foreground", &vision::MattingResult::foreground) - .def_readwrite("shape", &vision::MattingResult::shape) - .def_readwrite("contain_foreground", &vision::MattingResult::shape) - .def("__repr__", &vision::MattingResult::Str) - .def("__str__", &vision::MattingResult::Str); - - BindPPCls(m); - BindPPDet(m); - BindPPSeg(m); - - BindDetection(m); - BindFaceDet(m); - BindFaceId(m); - BindMatting(m); -#ifdef ENABLE_VISION_VISUALIZE - BindVisualize(m); -#endif -} -} // namespace fastdeploy diff --git a/csrcs/fastdeploy/vision/visualize/detection.cc b/csrcs/fastdeploy/vision/visualize/detection.cc deleted file mode 100644 index 147ef6556..000000000 --- a/csrcs/fastdeploy/vision/visualize/detection.cc +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifdef ENABLE_VISION_VISUALIZE - -#include "fastdeploy/vision/visualize/visualize.h" -#include "opencv2/imgproc/imgproc.hpp" - -namespace fastdeploy { -namespace vision { - -// Default only support visualize num_classes <= 1000 -// If need to visualize num_classes > 1000 -// Please call Visualize::GetColorMap(num_classes) first -cv::Mat Visualize::VisDetection(const cv::Mat& im, - const DetectionResult& result, int line_size, - float font_size) { - auto color_map = GetColorMap(); - int h = im.rows; - int w = im.cols; - auto vis_im = im.clone(); - for (size_t i = 0; i < result.boxes.size(); ++i) { - cv::Rect rect(result.boxes[i][0], result.boxes[i][1], - result.boxes[i][2] - result.boxes[i][0], - result.boxes[i][3] - result.boxes[i][1]); - int c0 = color_map[3 * result.label_ids[i] + 0]; - int c1 = color_map[3 * result.label_ids[i] + 1]; - int c2 = color_map[3 * result.label_ids[i] + 2]; - cv::Scalar rect_color = cv::Scalar(c0, c1, c2); - std::string id = std::to_string(result.label_ids[i]); - std::string score = std::to_string(result.scores[i]); - if (score.size() > 4) { - score = score.substr(0, 4); - } - std::string text = id + "," + score; - int font = cv::FONT_HERSHEY_SIMPLEX; - cv::Size text_size = cv::getTextSize(text, font, font_size, 1, nullptr); - cv::Point origin; - origin.x = rect.x; - origin.y = rect.y; - cv::Rect text_background = - cv::Rect(result.boxes[i][0], result.boxes[i][1] - text_size.height, - text_size.width, text_size.height); - cv::rectangle(vis_im, rect, rect_color, line_size); - cv::putText(vis_im, text, origin, font, font_size, - cv::Scalar(255, 255, 255), 1); - } - return vis_im; -} - -} // namespace vision -} // namespace fastdeploy -#endif diff --git a/csrcs/fastdeploy/vision/visualize/face_detection.cc b/csrcs/fastdeploy/vision/visualize/face_detection.cc deleted file mode 100644 index d9da27786..000000000 --- a/csrcs/fastdeploy/vision/visualize/face_detection.cc +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifdef ENABLE_VISION_VISUALIZE - -#include "fastdeploy/vision/visualize/visualize.h" -#include "opencv2/imgproc/imgproc.hpp" - -namespace fastdeploy { - -namespace vision { - -// Default only support visualize num_classes <= 1000 -// If need to visualize num_classes > 1000 -// Please call Visualize::GetColorMap(num_classes) first -cv::Mat Visualize::VisFaceDetection(const cv::Mat& im, - const FaceDetectionResult& result, - int line_size, float font_size) { - auto color_map = GetColorMap(); - int h = im.rows; - int w = im.cols; - - auto vis_im = im.clone(); - bool vis_landmarks = false; - if ((result.landmarks_per_face > 0) && - (result.boxes.size() * result.landmarks_per_face == - result.landmarks.size())) { - vis_landmarks = true; - } - for (size_t i = 0; i < result.boxes.size(); ++i) { - cv::Rect rect(result.boxes[i][0], result.boxes[i][1], - result.boxes[i][2] - result.boxes[i][0], - result.boxes[i][3] - result.boxes[i][1]); - int color_id = i % 333; - int c0 = color_map[3 * color_id + 0]; - int c1 = color_map[3 * color_id + 1]; - int c2 = color_map[3 * color_id + 2]; - cv::Scalar rect_color = cv::Scalar(c0, c1, c2); - std::string text = std::to_string(result.scores[i]); - if (text.size() > 4) { - text = text.substr(0, 4); - } - int font = cv::FONT_HERSHEY_SIMPLEX; - cv::Size text_size = cv::getTextSize(text, font, font_size, 1, nullptr); - cv::Point origin; - origin.x = rect.x; - origin.y = rect.y; - cv::Rect text_background = - cv::Rect(result.boxes[i][0], result.boxes[i][1] - text_size.height, - text_size.width, text_size.height); - cv::rectangle(vis_im, rect, rect_color, line_size); - cv::putText(vis_im, text, origin, font, font_size, - cv::Scalar(255, 255, 255), 1); - // vis landmarks (if have) - if (vis_landmarks) { - cv::Scalar landmark_color = rect_color; - for (size_t j = 0; j < result.landmarks_per_face; ++j) { - cv::Point landmark; - landmark.x = static_cast( - result.landmarks[i * result.landmarks_per_face + j][0]); - landmark.y = static_cast( - result.landmarks[i * result.landmarks_per_face + j][1]); - cv::circle(vis_im, landmark, line_size, landmark_color, -1); - } - } - } - return vis_im; -} - -} // namespace vision -} // namespace fastdeploy - -#endif diff --git a/csrcs/fastdeploy/vision/visualize/matting_alpha.cc b/csrcs/fastdeploy/vision/visualize/matting_alpha.cc deleted file mode 100644 index 1018018c6..000000000 --- a/csrcs/fastdeploy/vision/visualize/matting_alpha.cc +++ /dev/null @@ -1,119 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifdef ENABLE_VISION_VISUALIZE - -#include "fastdeploy/vision/visualize/visualize.h" -#include "opencv2/highgui.hpp" -#include "opencv2/imgproc/imgproc.hpp" - -namespace fastdeploy { -namespace vision { - -static void RemoveSmallConnectedArea(cv::Mat* alpha_pred, - float threshold = 0.05f) { - // 移除小的联通区域和噪点 开闭合形态学处理 - // 假设输入的是透明度alpha, 值域(0.,1.) - cv::Mat gray, binary; - (*alpha_pred).convertTo(gray, CV_8UC1, 255.f); - // 255 * 0.05 ~ 13 - unsigned int binary_threshold = static_cast(255.f * threshold); - cv::threshold(gray, binary, binary_threshold, 255, cv::THRESH_BINARY); - // morphologyEx with OPEN operation to remove noise first. - auto kernel = cv::getStructuringElement(cv::MORPH_ELLIPSE, cv::Size(3, 3), - cv::Point(-1, -1)); - cv::morphologyEx(binary, binary, cv::MORPH_OPEN, kernel); - // Computationally connected domain - cv::Mat labels = cv::Mat::zeros((*alpha_pred).size(), CV_32S); - cv::Mat stats, centroids; - int num_labels = - cv::connectedComponentsWithStats(binary, labels, stats, centroids, 8, 4); - if (num_labels <= 1) { - // no noise, skip. - return; - } - // find max connected area, 0 is background - int max_connected_id = 1; // 1,2,... - int max_connected_area = stats.at(max_connected_id, cv::CC_STAT_AREA); - for (int i = 1; i < num_labels; ++i) { - int tmp_connected_area = stats.at(i, cv::CC_STAT_AREA); - if (tmp_connected_area > max_connected_area) { - max_connected_area = tmp_connected_area; - max_connected_id = i; - } - } - const int h = (*alpha_pred).rows; - const int w = (*alpha_pred).cols; - // remove small connected area. - for (int i = 0; i < h; ++i) { - int* label_row_ptr = labels.ptr(i); - float* alpha_row_ptr = (*alpha_pred).ptr(i); - for (int j = 0; j < w; ++j) { - if (label_row_ptr[j] != max_connected_id) alpha_row_ptr[j] = 0.f; - } - } -} - -cv::Mat Visualize::VisMattingAlpha(const cv::Mat& im, - const MattingResult& result, - bool remove_small_connected_area) { - // 只可视化alpha,fgr(前景)本身就是一张图 不需要可视化 - FDASSERT((!im.empty()), "im can't be empty!"); - FDASSERT((im.channels() == 3), "Only support 3 channels mat!"); - - auto vis_img = im.clone(); - int out_h = static_cast(result.shape[0]); - int out_w = static_cast(result.shape[1]); - int height = im.rows; - int width = im.cols; - // alpha to cv::Mat && 避免resize等操作修改外部数据 - std::vector alpha_copy; - alpha_copy.assign(result.alpha.begin(), result.alpha.end()); - float* alpha_ptr = static_cast(alpha_copy.data()); - cv::Mat alpha(out_h, out_w, CV_32FC1, alpha_ptr); - if (remove_small_connected_area) { - RemoveSmallConnectedArea(&alpha, 0.05f); - } - if ((out_h != height) || (out_w != width)) { - cv::resize(alpha, alpha, cv::Size(width, height)); - } - - if ((vis_img).type() != CV_8UC3) { - (vis_img).convertTo((vis_img), CV_8UC3); - } - - uchar* vis_data = static_cast(vis_img.data); - uchar* im_data = static_cast(im.data); - float* alpha_data = reinterpret_cast(alpha.data); - - for (size_t i = 0; i < height; ++i) { - for (size_t j = 0; j < width; ++j) { - float alpha_val = alpha_data[i * width + j]; - vis_data[i * width * 3 + j * 3 + 0] = cv::saturate_cast( - static_cast(im_data[i * width * 3 + j * 3 + 0]) * alpha_val + - (1.f - alpha_val) * 153.f); - vis_data[i * width * 3 + j * 3 + 1] = cv::saturate_cast( - static_cast(im_data[i * width * 3 + j * 3 + 1]) * alpha_val + - (1.f - alpha_val) * 255.f); - vis_data[i * width * 3 + j * 3 + 2] = cv::saturate_cast( - static_cast(im_data[i * width * 3 + j * 3 + 2]) * alpha_val + - (1.f - alpha_val) * 120.f); - } - } - return vis_img; -} - -} // namespace vision -} // namespace fastdeploy -#endif diff --git a/csrcs/fastdeploy/vision/visualize/segmentation.cc b/csrcs/fastdeploy/vision/visualize/segmentation.cc deleted file mode 100644 index 7d3790328..000000000 --- a/csrcs/fastdeploy/vision/visualize/segmentation.cc +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifdef ENABLE_VISION_VISUALIZE - -#include "fastdeploy/vision/visualize/visualize.h" -#include "opencv2/highgui.hpp" -#include "opencv2/imgproc/imgproc.hpp" - -namespace fastdeploy { -namespace vision { - -cv::Mat Visualize::VisSegmentation(const cv::Mat& im, - const SegmentationResult& result) { - auto color_map = GetColorMap(); - int64_t height = result.shape[0]; - int64_t width = result.shape[1]; - auto vis_img = cv::Mat(height, width, CV_8UC3); - - int64_t index = 0; - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - int category_id = result.label_map[index++]; - vis_img.at(i, j)[0] = color_map[3 * category_id + 0]; - vis_img.at(i, j)[1] = color_map[3 * category_id + 1]; - vis_img.at(i, j)[2] = color_map[3 * category_id + 2]; - } - } - cv::addWeighted(im, .5, vis_img, .5, 0, vis_img); - return vis_img; -} - -} // namespace vision -} // namespace fastdeploy -#endif diff --git a/csrcs/fastdeploy/vision/visualize/visualize.cc b/csrcs/fastdeploy/vision/visualize/visualize.cc deleted file mode 100644 index 4ad6ba124..000000000 --- a/csrcs/fastdeploy/vision/visualize/visualize.cc +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifdef ENABLE_VISION_VISUALIZE -#include "fastdeploy/vision/visualize/visualize.h" - -namespace fastdeploy { -namespace vision { - -int Visualize::num_classes_ = 0; -std::vector Visualize::color_map_ = std::vector(); - -const std::vector& Visualize::GetColorMap(int num_classes) { - if (num_classes < num_classes_) { - return color_map_; - } - num_classes_ = num_classes; - std::vector().swap(color_map_); - color_map_.resize(3 * num_classes_, 0); - for (int i = 0; i < num_classes_; ++i) { - int j = 0; - int lab = i; - while (lab) { - color_map_[i * 3] |= (((lab >> 0) & 1) << (7 - j)); - color_map_[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j)); - color_map_[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j)); - ++j; - lab >>= 3; - } - } - return color_map_; -} - -} // namespace vision -} // namespace fastdeploy -#endif diff --git a/csrcs/fastdeploy/vision/visualize/visualize.h b/csrcs/fastdeploy/vision/visualize/visualize.h deleted file mode 100644 index bee62c301..000000000 --- a/csrcs/fastdeploy/vision/visualize/visualize.h +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifdef ENABLE_VISION_VISUALIZE -#pragma once - -#include "fastdeploy/vision/common/result.h" -#include "opencv2/imgproc/imgproc.hpp" -namespace fastdeploy { -namespace vision { - -class FASTDEPLOY_DECL Visualize { - public: - static int num_classes_; - static std::vector color_map_; - static const std::vector& GetColorMap(int num_classes = 1000); - static cv::Mat VisDetection(const cv::Mat& im, const DetectionResult& result, - int line_size = 1, float font_size = 0.5f); - static cv::Mat VisFaceDetection(const cv::Mat& im, - const FaceDetectionResult& result, - int line_size = 1, float font_size = 0.5f); - static cv::Mat VisSegmentation(const cv::Mat& im, - const SegmentationResult& result); - static cv::Mat VisMattingAlpha(const cv::Mat& im, const MattingResult& result, - bool remove_small_connected_area = false); -}; - -} // namespace vision -} // namespace fastdeploy -#endif diff --git a/csrcs/fastdeploy/vision/visualize/visualize_pybind.cc b/csrcs/fastdeploy/vision/visualize/visualize_pybind.cc deleted file mode 100644 index 36010acf1..000000000 --- a/csrcs/fastdeploy/vision/visualize/visualize_pybind.cc +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/pybind/main.h" - -namespace fastdeploy { -void BindVisualize(pybind11::module& m) { - pybind11::class_(m, "Visualize") - .def(pybind11::init<>()) - .def_static("vis_detection", - [](pybind11::array& im_data, vision::DetectionResult& result, - int line_size, float font_size) { - auto im = PyArrayToCvMat(im_data); - auto vis_im = vision::Visualize::VisDetection( - im, result, line_size, font_size); - FDTensor out; - vision::Mat(vis_im).ShareWithTensor(&out); - return TensorToPyArray(out); - }) - .def_static( - "vis_face_detection", - [](pybind11::array& im_data, vision::FaceDetectionResult& result, - int line_size, float font_size) { - auto im = PyArrayToCvMat(im_data); - auto vis_im = vision::Visualize::VisFaceDetection( - im, result, line_size, font_size); - FDTensor out; - vision::Mat(vis_im).ShareWithTensor(&out); - return TensorToPyArray(out); - }) - .def_static( - "vis_segmentation", - [](pybind11::array& im_data, vision::SegmentationResult& result) { - cv::Mat im = PyArrayToCvMat(im_data); - auto vis_im = vision::Visualize::VisSegmentation(im, result); - FDTensor out; - vision::Mat(vis_im).ShareWithTensor(&out); - return TensorToPyArray(out); - }) - .def_static("vis_matting_alpha", - [](pybind11::array& im_data, vision::MattingResult& result, - bool remove_small_connected_area) { - cv::Mat im = PyArrayToCvMat(im_data); - auto vis_im = vision::Visualize::VisMattingAlpha( - im, result, remove_small_connected_area); - FDTensor out; - vision::Mat(vis_im).ShareWithTensor(&out); - return TensorToPyArray(out); - }); -} -} // namespace fastdeploy diff --git a/examples/.gitignore b/examples/.gitignore deleted file mode 100644 index 0c684c6ae..000000000 --- a/examples/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -*.jpg -*.png -*.jpeg -*.onnx -*.engine -*.pd* -*.nb -bin \ No newline at end of file diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt deleted file mode 100644 index 770bf44da..000000000 --- a/examples/CMakeLists.txt +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -function(add_fastdeploy_executable FIELD CC_FILE) - # temp target name/file var in function scope - set(TEMP_TARGET_FILE ${CC_FILE}) - string(REGEX MATCHALL "[0-9A-Za-z_]*.cc" FILE_NAME ${CC_FILE}) - string(REGEX REPLACE ".cc" "" FILE_PREFIX ${FILE_NAME}) - set(TEMP_TARGET_NAME ${FIELD}_${FILE_PREFIX}) - if (EXISTS ${TEMP_TARGET_FILE} AND TARGET fastdeploy) - add_executable(${TEMP_TARGET_NAME} ${TEMP_TARGET_FILE}) - target_link_libraries(${TEMP_TARGET_NAME} PUBLIC fastdeploy) - message(STATUS " Added FastDeploy Executable : ${TEMP_TARGET_NAME}") - endif() - unset(TEMP_TARGET_FILE) - unset(TEMP_TARGET_NAME) -endfunction() - -# vision examples -if(WITH_VISION_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples/vision) - message(STATUS "") - message(STATUS "*************FastDeploy Examples Summary**********") - file(GLOB ALL_VISION_EXAMPLE_SRCS ${PROJECT_SOURCE_DIR}/examples/vision/*.cc) - foreach(_CC_FILE ${ALL_VISION_EXAMPLE_SRCS}) - add_fastdeploy_executable(vision ${_CC_FILE}) - endforeach() -endif() - -# text examples -if(WITH_TEXT_EXAMPLES AND EXISTS ${PROJECT_SOURCE_DIR}/examples/text) - message(STATUS "") - message(STATUS "*************FastDeploy Examples Summary**********") - file(GLOB ALL_VISION_EXAMPLE_SRCS ${PROJECT_SOURCE_DIR}/examples/text/*.cc) - foreach(_CC_FILE ${ALL_VISION_EXAMPLE_SRCS}) - add_fastdeploy_executable(text ${_CC_FILE}) - endforeach() -endif() - -# other examples ... diff --git a/examples/resources/.gitignore b/examples/resources/.gitignore deleted file mode 100644 index aadf70252..000000000 --- a/examples/resources/.gitignore +++ /dev/null @@ -1,15 +0,0 @@ -images/*.jpg -images/*.jpeg -images/*.png -models/*.onnx -models/*.pd* -models/*.engine -models/*.trt -models/*.nb -models/*param* -models/*model* -outputs/*.jpg -outputs/*.jpeg -outputs/*.png -outputs/*.txt -outputs/*.json \ No newline at end of file diff --git a/examples/resources/images/.gitignore b/examples/resources/images/.gitignore deleted file mode 100644 index a025c1b2f..000000000 --- a/examples/resources/images/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -*.jpg -*.jpeg -*.png \ No newline at end of file diff --git a/examples/resources/models/.gitignore b/examples/resources/models/.gitignore deleted file mode 100644 index 8a3992492..000000000 --- a/examples/resources/models/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -*.onnx -*.engine -*.pd* -*.nb -*.trt \ No newline at end of file diff --git a/examples/resources/outputs/.gitignore b/examples/resources/outputs/.gitignore deleted file mode 100644 index b90600fbe..000000000 --- a/examples/resources/outputs/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -*.jpg -*.png -*.jpeg \ No newline at end of file diff --git a/examples/text/ernie_tokencls.cc b/examples/text/ernie_tokencls.cc deleted file mode 100644 index 1f04bbb66..000000000 --- a/examples/text/ernie_tokencls.cc +++ /dev/null @@ -1,225 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#include -#include - -#include "fastdeploy/text.h" -#include "tokenizers/ernie_faster_tokenizer.h" - -using namespace paddlenlp; - -void LoadTransitionFromFile(const std::string& file, - std::vector* transitions, int* num_tags) { - std::ifstream fin(file); - std::string curr_transition; - float transition; - int i = 0; - while (fin) { - std::getline(fin, curr_transition); - std::istringstream iss(curr_transition); - while (iss) { - iss >> transition; - transitions->push_back(transition); - } - if (curr_transition != "") { - ++i; - } - } - *num_tags = i; -} - -// Only useful for axis = -1 -template -void Softmax(const fastdeploy::FDTensor& input, fastdeploy::FDTensor* output) { - auto softmax_func = [](const T* score_vec, T* softmax_vec, int label_num) { - double score_max = *(std::max_element(score_vec, score_vec + label_num)); - double e_sum = 0; - for (int j = 0; j < label_num; j++) { - softmax_vec[j] = std::exp(score_vec[j] - score_max); - e_sum += softmax_vec[j]; - } - for (int k = 0; k < label_num; k++) { - softmax_vec[k] /= e_sum; - } - }; - - output->Allocate(input.shape, input.dtype); - int label_num = output->shape.back(); - int batch_size = input.Numel() / label_num; - int offset = 0; - const T* input_ptr = reinterpret_cast(input.Data()); - T* output_ptr = reinterpret_cast(output->Data()); - for (int i = 0; i < batch_size; ++i) { - softmax_func(input_ptr + offset, output_ptr + offset, label_num); - offset += label_num; - } -} - -// Only useful for axis = -1 -template -void Max(const fastdeploy::FDTensor& input, fastdeploy::FDTensor* output) { - auto output_shape = input.shape; - output_shape.back() = 1; - output->Allocate(output_shape, input.dtype); - int batch_size = output->Numel(); - int label_num = input.shape.back(); - int offset = 0; - const T* input_ptr = reinterpret_cast(input.Data()); - T* output_ptr = reinterpret_cast(output->Data()); - for (int i = 0; i < batch_size; ++i) { - output_ptr[i] = - *(std::max_element(input_ptr + offset, input_ptr + offset + label_num)); - offset += label_num; - } -} - -template -void ViterbiDecode(const fastdeploy::FDTensor& slot_logits, - const fastdeploy::FDTensor& trans, - fastdeploy::FDTensor* best_path) { - int batch_size = slot_logits.shape[0]; - int seq_len = slot_logits.shape[1]; - int num_tags = slot_logits.shape[2]; - best_path->Allocate({batch_size, seq_len}, fastdeploy::FDDataType::INT64); - - const T* slot_logits_ptr = reinterpret_cast(slot_logits.Data()); - const T* trans_ptr = reinterpret_cast(trans.Data()); - int64_t* best_path_ptr = reinterpret_cast(best_path->Data()); - std::vector scores(num_tags); - std::copy(slot_logits_ptr, slot_logits_ptr + num_tags, scores.begin()); - std::vector> M(num_tags, std::vector(num_tags)); - for (int b = 0; b < batch_size; ++b) { - std::vector> paths; - const T* curr_slot_logits_ptr = slot_logits_ptr + b * seq_len * num_tags; - int64_t* curr_best_path_ptr = best_path_ptr + b * seq_len; - for (int t = 1; t < seq_len; t++) { - for (size_t i = 0; i < num_tags; i++) { - for (size_t j = 0; j < num_tags; j++) { - auto trans_idx = i * num_tags * num_tags + j * num_tags; - auto slot_logit_idx = t * num_tags + j; - M[i][j] = scores[i] + trans_ptr[trans_idx] + - curr_slot_logits_ptr[slot_logit_idx]; - } - } - std::vector idxs; - for (size_t i = 0; i < num_tags; i++) { - T max = 0.0f; - int idx = 0; - for (size_t j = 0; j < num_tags; j++) { - if (M[j][i] > max) { - max = M[j][i]; - idx = j; - } - } - scores[i] = max; - idxs.push_back(idx); - } - paths.push_back(idxs); - } - int scores_max_index = 0; - float scores_max = 0.0f; - for (size_t i = 0; i < scores.size(); i++) { - if (scores[i] > scores_max) { - scores_max = scores[i]; - scores_max_index = i; - } - } - curr_best_path_ptr[seq_len - 1] = scores_max_index; - for (int i = seq_len - 2; i >= 0; i--) { - int index = curr_best_path_ptr[i + 1]; - curr_best_path_ptr[i] = paths[i][index]; - } - } -} - -int main() { - // 1. Define a ernie faster tokenizer - faster_tokenizer::tokenizers_impl::ErnieFasterTokenizer tokenizer( - "ernie_vocab.txt"); - std::vector strings_list = { - "导航去科技园二号楼", "屏幕亮度为我减小一点吧"}; - std::vector encodings; - tokenizer.EncodeBatchStrings(strings_list, &encodings); - size_t batch_size = strings_list.size(); - size_t seq_len = encodings[0].GetLen(); - for (auto&& encoding : encodings) { - std::cout << encoding.DebugString() << std::endl; - } - // 2. Initialize runtime - fastdeploy::RuntimeOption runtime_option; - runtime_option.SetModelPath("nano_static/model.pdmodel", - "nano_static/model.pdiparams"); - fastdeploy::Runtime runtime; - runtime.Init(runtime_option); - - // 3. Construct input vector - // 3.1 Convert encodings to input_ids, token_type_ids - std::vector input_ids, token_type_ids; - for (int i = 0; i < encodings.size(); ++i) { - auto&& curr_input_ids = encodings[i].GetIds(); - auto&& curr_type_ids = encodings[i].GetTypeIds(); - input_ids.insert(input_ids.end(), curr_input_ids.begin(), - curr_input_ids.end()); - token_type_ids.insert(token_type_ids.end(), curr_type_ids.begin(), - curr_type_ids.end()); - } - // 3.2 Set data to input vector - std::vector inputs(runtime.NumInputs()); - void* inputs_ptrs[] = {input_ids.data(), token_type_ids.data()}; - for (int i = 0; i < runtime.NumInputs(); ++i) { - inputs[i].SetExternalData({batch_size, seq_len}, - fastdeploy::FDDataType::INT64, inputs_ptrs[i]); - inputs[i].name = runtime.GetInputInfo(i).name; - } - - // 4. Infer - std::vector outputs(runtime.NumOutputs()); - runtime.Infer(inputs, &outputs); - - // 5. Postprocess - fastdeploy::FDTensor domain_probs, intent_probs; - Softmax(outputs[0], &domain_probs); - Softmax(outputs[1], &intent_probs); - - fastdeploy::FDTensor domain_max_probs, intent_max_probs; - Max(domain_probs, &domain_max_probs); - Max(intent_probs, &intent_max_probs); - - std::vector transition; - int num_tags; - LoadTransitionFromFile("joint_transition.txt", &transition, &num_tags); - fastdeploy::FDTensor trans; - trans.SetExternalData({num_tags, num_tags}, fastdeploy::FDDataType::FP32, - transition.data()); - - fastdeploy::FDTensor best_path; - ViterbiDecode(outputs[2], trans, &best_path); - // 6. Print result - domain_max_probs.PrintInfo(); - intent_max_probs.PrintInfo(); - - batch_size = best_path.shape[0]; - seq_len = best_path.shape[1]; - const int64_t* best_path_ptr = - reinterpret_cast(best_path.Data()); - for (int i = 0; i < batch_size; ++i) { - std::cout << "best_path[" << i << "] = "; - for (int j = 0; j < seq_len; ++j) { - std::cout << best_path_ptr[i * seq_len + j] << ", "; - } - std::cout << std::endl; - } - best_path.PrintInfo(); - return 0; -} diff --git a/examples/vision/biubug6_retinaface.cc b/examples/vision/biubug6_retinaface.cc deleted file mode 100644 index 65a396ff9..000000000 --- a/examples/vision/biubug6_retinaface.cc +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision.h" - -int main() { - namespace vis = fastdeploy::vision; - - std::string model_file = - "../resources/models/Pytorch_RetinaFace_resnet50-720-1080.onnx"; - std::string img_path = "../resources/images/test_face_det.jpg"; - std::string vis_path = - "../resources/outputs/biubug6_retinaface_vis_result.jpg"; - - auto model = vis::biubug6::RetinaFace(model_file); - model.size = {1080, 720}; // (width, height) - if (!model.Initialized()) { - std::cerr << "Init Failed! Model: " << model_file << std::endl; - return -1; - } else { - std::cout << "Init Done! Model:" << model_file << std::endl; - } - model.EnableDebug(); - - cv::Mat im = cv::imread(img_path); - cv::Mat vis_im = im.clone(); - - vis::FaceDetectionResult res; - if (!model.Predict(&im, &res, 0.3f, 0.3f)) { - std::cerr << "Prediction Failed." << std::endl; - return -1; - } else { - std::cout << "Prediction Done!" << std::endl; - } - - // 输出预测框结果 - std::cout << res.Str() << std::endl; - - // 可视化预测结果 - vis::Visualize::VisFaceDetection(&vis_im, res, 2, 0.3f); - cv::imwrite(vis_path, vis_im); - std::cout << "Detect Done! Saved: " << vis_path << std::endl; - return 0; -} diff --git a/examples/vision/deepcam_yolov5face.cc b/examples/vision/deepcam_yolov5face.cc deleted file mode 100644 index c6e0083e0..000000000 --- a/examples/vision/deepcam_yolov5face.cc +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision.h" - -int main() { - namespace vis = fastdeploy::vision; - - std::string model_file = "../resources/models/yolov5s-face.onnx"; - std::string img_path = "../resources/images/test_face_det.jpg"; - std::string vis_path = - "../resources/outputs/deepcam_yolov5face_vis_result.jpg"; - - auto model = vis::deepcam::YOLOv5Face(model_file); - if (!model.Initialized()) { - std::cerr << "Init Failed! Model: " << model_file << std::endl; - return -1; - } else { - std::cout << "Init Done! Model:" << model_file << std::endl; - } - model.EnableDebug(); - - cv::Mat im = cv::imread(img_path); - cv::Mat vis_im = im.clone(); - - vis::FaceDetectionResult res; - if (!model.Predict(&im, &res, 0.1f, 0.3f)) { - std::cerr << "Prediction Failed." << std::endl; - return -1; - } else { - std::cout << "Prediction Done!" << std::endl; - } - - // 输出预测框结果 - std::cout << res.Str() << std::endl; - - // 可视化预测结果 - vis::Visualize::VisFaceDetection(&vis_im, res, 2, 0.3f); - cv::imwrite(vis_path, vis_im); - std::cout << "Detect Done! Saved: " << vis_path << std::endl; - return 0; -} diff --git a/examples/vision/deepinsight_arcface.cc b/examples/vision/deepinsight_arcface.cc deleted file mode 100644 index ff5626a32..000000000 --- a/examples/vision/deepinsight_arcface.cc +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision.h" -#include "fastdeploy/vision/utils/utils.h" - -int main() { - namespace vis = fastdeploy::vision; - // 0,1 同一个人, 0,2 不同的人 - std::string model_file = "../resources/models/ms1mv3_arcface_r100.onnx"; - std::string face0_path = "../resources/images/face_recognition_0.png"; - std::string face1_path = "../resources/images/face_recognition_1.png"; - std::string face2_path = "../resources/images/face_recognition_2.png"; - - auto model = vis::deepinsight::ArcFace(model_file); - if (!model.Initialized()) { - std::cerr << "Init Failed! Model: " << model_file << std::endl; - return -1; - } else { - std::cout << "Init Done! Model:" << model_file << std::endl; - } - model.EnableDebug(); - // 设置输出l2 normalize后的embedding - model.l2_normalize = true; - - cv::Mat face0 = cv::imread(face0_path); - cv::Mat face1 = cv::imread(face1_path); - cv::Mat face2 = cv::imread(face2_path); - - vis::FaceRecognitionResult res0; - vis::FaceRecognitionResult res1; - vis::FaceRecognitionResult res2; - if ((!model.Predict(&face0, &res0)) || (!model.Predict(&face1, &res1)) || - (!model.Predict(&face2, &res2))) { - std::cerr << "Prediction Failed." << std::endl; - return -1; - } - std::cout << "Prediction Done!" << std::endl; - - // 输出预测框结果 - std::cout << "--- [Face 0]:" << res0.Str(); - std::cout << "--- [Face 1]:" << res1.Str(); - std::cout << "--- [Face 2]:" << res2.Str(); - - // 计算余弦相似度 - float cosine01 = vis::utils::CosineSimilarity(res0.embedding, res1.embedding, - model.l2_normalize); - float cosine02 = vis::utils::CosineSimilarity(res0.embedding, res2.embedding, - model.l2_normalize); - std::cout << "Detect Done! Cosine 01: " << cosine01 - << ", Cosine 02:" << cosine02 << std::endl; - return 0; -} diff --git a/examples/vision/deepinsight_cosface.cc b/examples/vision/deepinsight_cosface.cc deleted file mode 100644 index 7787537ae..000000000 --- a/examples/vision/deepinsight_cosface.cc +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision.h" -#include "fastdeploy/vision/utils/utils.h" - -int main() { - namespace vis = fastdeploy::vision; - // 0,1 同一个人, 0,2 不同的人 - std::string model_file = "../resources/models/glint360k_cosface_r100.onnx"; - std::string face0_path = "../resources/images/face_recognition_0.png"; - std::string face1_path = "../resources/images/face_recognition_1.png"; - std::string face2_path = "../resources/images/face_recognition_2.png"; - - auto model = vis::deepinsight::CosFace(model_file); - if (!model.Initialized()) { - std::cerr << "Init Failed! Model: " << model_file << std::endl; - return -1; - } else { - std::cout << "Init Done! Model:" << model_file << std::endl; - } - model.EnableDebug(); - // 设置输出l2 normalize后的embedding - model.l2_normalize = true; - - cv::Mat face0 = cv::imread(face0_path); - cv::Mat face1 = cv::imread(face1_path); - cv::Mat face2 = cv::imread(face2_path); - - vis::FaceRecognitionResult res0; - vis::FaceRecognitionResult res1; - vis::FaceRecognitionResult res2; - if ((!model.Predict(&face0, &res0)) || (!model.Predict(&face1, &res1)) || - (!model.Predict(&face2, &res2))) { - std::cerr << "Prediction Failed." << std::endl; - return -1; - } - std::cout << "Prediction Done!" << std::endl; - - // 输出预测框结果 - std::cout << "--- [Face 0]:" << res0.Str(); - std::cout << "--- [Face 1]:" << res1.Str(); - std::cout << "--- [Face 2]:" << res2.Str(); - - // 计算余弦相似度 - float cosine01 = vis::utils::CosineSimilarity(res0.embedding, res1.embedding, - model.l2_normalize); - float cosine02 = vis::utils::CosineSimilarity(res0.embedding, res2.embedding, - model.l2_normalize); - std::cout << "Detect Done! Cosine 01: " << cosine01 - << ", Cosine 02:" << cosine02 << std::endl; - return 0; -} diff --git a/examples/vision/deepinsight_insightface_rec.cc b/examples/vision/deepinsight_insightface_rec.cc deleted file mode 100644 index b2579bad8..000000000 --- a/examples/vision/deepinsight_insightface_rec.cc +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision.h" -#include "fastdeploy/vision/utils/utils.h" - -int main() { - namespace vis = fastdeploy::vision; - // 0,1 同一个人, 0,2 不同的人 - std::string model_file = "../resources/models/ms1mv3_arcface_r100.onnx"; - std::string face0_path = "../resources/images/face_recognition_0.png"; - std::string face1_path = "../resources/images/face_recognition_1.png"; - std::string face2_path = "../resources/images/face_recognition_2.png"; - - auto model = vis::deepinsight::InsightFaceRecognitionModel(model_file); - if (!model.Initialized()) { - std::cerr << "Init Failed! Model: " << model_file << std::endl; - return -1; - } else { - std::cout << "Init Done! Model:" << model_file << std::endl; - } - model.EnableDebug(); - // 设置输出l2 normalize后的embedding - model.l2_normalize = true; - - cv::Mat face0 = cv::imread(face0_path); - cv::Mat face1 = cv::imread(face1_path); - cv::Mat face2 = cv::imread(face2_path); - - vis::FaceRecognitionResult res0; - vis::FaceRecognitionResult res1; - vis::FaceRecognitionResult res2; - if ((!model.Predict(&face0, &res0)) || (!model.Predict(&face1, &res1)) || - (!model.Predict(&face2, &res2))) { - std::cerr << "Prediction Failed." << std::endl; - return -1; - } - std::cout << "Prediction Done!" << std::endl; - - // 输出预测框结果 - std::cout << "--- [Face 0]:" << res0.Str(); - std::cout << "--- [Face 1]:" << res1.Str(); - std::cout << "--- [Face 2]:" << res2.Str(); - - // 计算余弦相似度 - float cosine01 = vis::utils::CosineSimilarity(res0.embedding, res1.embedding, - model.l2_normalize); - float cosine02 = vis::utils::CosineSimilarity(res0.embedding, res2.embedding, - model.l2_normalize); - std::cout << "Detect Done! Cosine 01: " << cosine01 - << ", Cosine 02:" << cosine02 << std::endl; - return 0; -} diff --git a/examples/vision/deepinsight_partial_fc.cc b/examples/vision/deepinsight_partial_fc.cc deleted file mode 100644 index 85ec01799..000000000 --- a/examples/vision/deepinsight_partial_fc.cc +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision.h" -#include "fastdeploy/vision/utils/utils.h" - -int main() { - namespace vis = fastdeploy::vision; - // 0,1 同一个人, 0,2 不同的人 - std::string model_file = "../resources/models/partial_fc_glint360k_r100.onnx"; - std::string face0_path = "../resources/images/face_recognition_0.png"; - std::string face1_path = "../resources/images/face_recognition_1.png"; - std::string face2_path = "../resources/images/face_recognition_2.png"; - - auto model = vis::deepinsight::PartialFC(model_file); - if (!model.Initialized()) { - std::cerr << "Init Failed! Model: " << model_file << std::endl; - return -1; - } else { - std::cout << "Init Done! Model:" << model_file << std::endl; - } - model.EnableDebug(); - // 设置输出l2 normalize后的embedding - model.l2_normalize = true; - - cv::Mat face0 = cv::imread(face0_path); - cv::Mat face1 = cv::imread(face1_path); - cv::Mat face2 = cv::imread(face2_path); - - vis::FaceRecognitionResult res0; - vis::FaceRecognitionResult res1; - vis::FaceRecognitionResult res2; - if ((!model.Predict(&face0, &res0)) || (!model.Predict(&face1, &res1)) || - (!model.Predict(&face2, &res2))) { - std::cerr << "Prediction Failed." << std::endl; - return -1; - } - std::cout << "Prediction Done!" << std::endl; - - // 输出预测框结果 - std::cout << "--- [Face 0]:" << res0.Str(); - std::cout << "--- [Face 1]:" << res1.Str(); - std::cout << "--- [Face 2]:" << res2.Str(); - - // 计算余弦相似度 - float cosine01 = vis::utils::CosineSimilarity(res0.embedding, res1.embedding, - model.l2_normalize); - float cosine02 = vis::utils::CosineSimilarity(res0.embedding, res2.embedding, - model.l2_normalize); - std::cout << "Detect Done! Cosine 01: " << cosine01 - << ", Cosine 02:" << cosine02 << std::endl; - return 0; -} diff --git a/examples/vision/deepinsight_scrfd.cc b/examples/vision/deepinsight_scrfd.cc deleted file mode 100644 index 0ff68db93..000000000 --- a/examples/vision/deepinsight_scrfd.cc +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision.h" -int main() { - namespace vis = fastdeploy::vision; - - std::string model_file = "../resources/models/SCRFD.onnx"; - std::string img_path = "../resources/images/test_face_det.jpg"; - std::string vis_path = "../resources/outputs/deepsight_scrfd_vis_result.jpg"; - - auto model = vis::deepinsight::SCRFD(model_file); - model.size = {640, 640}; // (width, height) - if (!model.Initialized()) { - std::cerr << "Init Failed! Model: " << model_file << std::endl; - return -1; - } else { - std::cout << "Init Done! Model:" << model_file << std::endl; - } - model.EnableDebug(); - - cv::Mat im = cv::imread(img_path); - cv::Mat vis_im = im.clone(); - vis::FaceDetectionResult res; - if (!model.Predict(&im, &res, 0.3f, 0.3f)) { - std::cerr << "Prediction Failed." << std::endl; - return -1; - } else { - std::cout << "Prediction Done!" << std::endl; - } - - // 输出预测框结果 - std::cout << res.Str() << std::endl; - - // 可视化预测结果 - vis::Visualize::VisFaceDetection(&vis_im, res, 2, 0.3f); - cv::imwrite(vis_path, vis_im); - std::cout << "Detect Done! Saved: " << vis_path << std::endl; - return 0; -} diff --git a/examples/vision/deepinsight_vpl.cc b/examples/vision/deepinsight_vpl.cc deleted file mode 100644 index 8326d3569..000000000 --- a/examples/vision/deepinsight_vpl.cc +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision.h" -#include "fastdeploy/vision/utils/utils.h" - -int main() { - namespace vis = fastdeploy::vision; - // 0,1 同一个人, 0,2 不同的人 - std::string model_file = "../resources/models/ms1mv3_r100_lr01.onnx"; - std::string face0_path = "../resources/images/face_recognition_0.png"; - std::string face1_path = "../resources/images/face_recognition_1.png"; - std::string face2_path = "../resources/images/face_recognition_2.png"; - - auto model = vis::deepinsight::VPL(model_file); - if (!model.Initialized()) { - std::cerr << "Init Failed! Model: " << model_file << std::endl; - return -1; - } else { - std::cout << "Init Done! Model:" << model_file << std::endl; - } - model.EnableDebug(); - // 设置输出l2 normalize后的embedding - model.l2_normalize = true; - - cv::Mat face0 = cv::imread(face0_path); - cv::Mat face1 = cv::imread(face1_path); - cv::Mat face2 = cv::imread(face2_path); - - vis::FaceRecognitionResult res0; - vis::FaceRecognitionResult res1; - vis::FaceRecognitionResult res2; - if ((!model.Predict(&face0, &res0)) || (!model.Predict(&face1, &res1)) || - (!model.Predict(&face2, &res2))) { - std::cerr << "Prediction Failed." << std::endl; - return -1; - } - std::cout << "Prediction Done!" << std::endl; - - // 输出预测框结果 - std::cout << "--- [Face 0]:" << res0.Str(); - std::cout << "--- [Face 1]:" << res1.Str(); - std::cout << "--- [Face 2]:" << res2.Str(); - - // 计算余弦相似度 - float cosine01 = vis::utils::CosineSimilarity(res0.embedding, res1.embedding, - model.l2_normalize); - float cosine02 = vis::utils::CosineSimilarity(res0.embedding, res2.embedding, - model.l2_normalize); - std::cout << "Detect Done! Cosine 01: " << cosine01 - << ", Cosine 02:" << cosine02 << std::endl; - return 0; -} diff --git a/new_examples/vision/detection/README.md b/examples/vision/detection/README.md similarity index 100% rename from new_examples/vision/detection/README.md rename to examples/vision/detection/README.md diff --git a/new_examples/vision/detection/yolov7/README.md b/examples/vision/detection/yolov7/README.md similarity index 100% rename from new_examples/vision/detection/yolov7/README.md rename to examples/vision/detection/yolov7/README.md diff --git a/new_examples/vision/detection/yolov7/cpp/CMakeLists.txt b/examples/vision/detection/yolov7/cpp/CMakeLists.txt similarity index 100% rename from new_examples/vision/detection/yolov7/cpp/CMakeLists.txt rename to examples/vision/detection/yolov7/cpp/CMakeLists.txt diff --git a/new_examples/vision/detection/yolov7/cpp/README.md b/examples/vision/detection/yolov7/cpp/README.md similarity index 100% rename from new_examples/vision/detection/yolov7/cpp/README.md rename to examples/vision/detection/yolov7/cpp/README.md diff --git a/new_examples/vision/detection/yolov7/cpp/infer.cc b/examples/vision/detection/yolov7/cpp/infer.cc similarity index 100% rename from new_examples/vision/detection/yolov7/cpp/infer.cc rename to examples/vision/detection/yolov7/cpp/infer.cc diff --git a/new_examples/vision/detection/yolov7/python/README.md b/examples/vision/detection/yolov7/python/README.md similarity index 100% rename from new_examples/vision/detection/yolov7/python/README.md rename to examples/vision/detection/yolov7/python/README.md diff --git a/new_examples/vision/detection/yolov7/python/infer.py b/examples/vision/detection/yolov7/python/infer.py similarity index 100% rename from new_examples/vision/detection/yolov7/python/infer.py rename to examples/vision/detection/yolov7/python/infer.py diff --git a/examples/vision/linzaer_ultraface.cc b/examples/vision/linzaer_ultraface.cc deleted file mode 100644 index eb1cbafe8..000000000 --- a/examples/vision/linzaer_ultraface.cc +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision.h" - -int main() { - namespace vis = fastdeploy::vision; - - std::string model_file = "../resources/models/version-RFB-320.onnx"; - std::string img_path = "../resources/images/test_face_det_0.jpg"; - std::string vis_path = - "../resources/outputs/linzaer_ultraface_vis_result.jpg"; - - auto model = vis::linzaer::UltraFace(model_file); - if (!model.Initialized()) { - std::cerr << "Init Failed! Model: " << model_file << std::endl; - return -1; - } else { - std::cout << "Init Done! Model:" << model_file << std::endl; - } - model.EnableDebug(); - - cv::Mat im = cv::imread(img_path); - cv::Mat vis_im = im.clone(); - - vis::FaceDetectionResult res; - if (!model.Predict(&im, &res, 0.7f, 0.3f)) { - std::cerr << "Prediction Failed." << std::endl; - return -1; - } else { - std::cout << "Prediction Done!" << std::endl; - } - - // 输出预测框结果 - std::cout << res.Str() << std::endl; - - // 可视化预测结果 - vis::Visualize::VisFaceDetection(&vis_im, res, 2, 0.3f); - cv::imwrite(vis_path, vis_im); - std::cout << "Detect Done! Saved: " << vis_path << std::endl; - return 0; -} diff --git a/examples/vision/megvii_yolox.cc b/examples/vision/megvii_yolox.cc deleted file mode 100644 index 340694b54..000000000 --- a/examples/vision/megvii_yolox.cc +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision.h" - -int main() { - namespace vis = fastdeploy::vision; - - std::string model_file = "../resources/models/yolox_s.onnx"; - std::string img_path = "../resources/images/bus.jpg"; - std::string vis_path = "../resources/outputs/megvii_yolox_vis_result.jpg"; - - auto model = vis::megvii::YOLOX(model_file); - if (!model.Initialized()) { - std::cerr << "Init Failed! Model: " << model_file << std::endl; - return -1; - } else { - std::cout << "Init Done! Model:" << model_file << std::endl; - } - model.EnableDebug(); - - cv::Mat im = cv::imread(img_path); - cv::Mat vis_im = im.clone(); - - vis::DetectionResult res; - if (!model.Predict(&im, &res)) { - std::cerr << "Prediction Failed." << std::endl; - return -1; - } else { - std::cout << "Prediction Done!" << std::endl; - } - - // 输出预测框结果 - std::cout << res.Str() << std::endl; - - // 可视化预测结果 - vis::Visualize::VisDetection(&vis_im, res); - cv::imwrite(vis_path, vis_im); - std::cout << "Detect Done! Saved: " << vis_path << std::endl; - return 0; -} diff --git a/examples/vision/meituan_yolov6.cc b/examples/vision/meituan_yolov6.cc deleted file mode 100644 index 7bdd78e5d..000000000 --- a/examples/vision/meituan_yolov6.cc +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision.h" - -int main() { - namespace vis = fastdeploy::vision; - - std::string model_file = "../resources/models/yolov6s.onnx"; - std::string img_path = "../resources/images/bus.jpg"; - std::string vis_path = "../resources/outputs/meituan_yolov6_vis_result.jpg"; - - auto model = vis::meituan::YOLOv6(model_file); - if (!model.Initialized()) { - std::cerr << "Init Failed! Model: " << model_file << std::endl; - return -1; - } else { - std::cout << "Init Done! Model:" << model_file << std::endl; - } - model.EnableDebug(); - - cv::Mat im = cv::imread(img_path); - cv::Mat vis_im = im.clone(); - - vis::DetectionResult res; - if (!model.Predict(&im, &res)) { - std::cerr << "Prediction Failed." << std::endl; - return -1; - } else { - std::cout << "Prediction Done!" << std::endl; - } - - // 输出预测框结果 - std::cout << res.Str() << std::endl; - - // 可视化预测结果 - vis::Visualize::VisDetection(&vis_im, res); - cv::imwrite(vis_path, vis_im); - std::cout << "Detect Done! Saved: " << vis_path << std::endl; - return 0; -} diff --git a/examples/vision/ppdet_ppyoloe.cc b/examples/vision/ppdet_ppyoloe.cc deleted file mode 100644 index b234021c9..000000000 --- a/examples/vision/ppdet_ppyoloe.cc +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision.h" - -int main() { - namespace vis = fastdeploy::vision; - - std::string model_file = "ppyoloe_crn_l_300e_coco/model.pdmodel"; - std::string params_file = "ppyoloe_crn_l_300e_coco/model.pdiparams"; - std::string config_file = "ppyoloe_crn_l_300e_coco/infer_cfg.yml"; - std::string img_path = "test.jpeg"; - std::string vis_path = "vis.jpeg"; - - auto model = vis::ppdet::PPYOLOE(model_file, params_file, config_file); - if (!model.Initialized()) { - std::cerr << "Init Failed." << std::endl; - return -1; - } - - cv::Mat im = cv::imread(img_path); - cv::Mat vis_im = im.clone(); - - vis::DetectionResult res; - if (!model.Predict(&im, &res)) { - std::cerr << "Prediction Failed." << std::endl; - return -1; - } else { - std::cout << "Prediction Done!" << std::endl; - } - - // 输出预测框结果 - std::cout << res.Str() << std::endl; - - // 可视化预测结果 - vis::Visualize::VisDetection(&vis_im, res); - cv::imwrite(vis_path, vis_im); - std::cout << "Detect Done! Saved: " << vis_path << std::endl; - return 0; -} diff --git a/examples/vision/ppogg_yolov5lite.cc b/examples/vision/ppogg_yolov5lite.cc deleted file mode 100644 index 577543b37..000000000 --- a/examples/vision/ppogg_yolov5lite.cc +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision.h" - -int main() { - namespace vis = fastdeploy::vision; - - std::string model_file = "../resources/models/yolov5lite.onnx"; - std::string img_path = "../resources/images/test.jpg"; - std::string vis_path = "../resources/outputs/ppogg_yolov5lite_vis_result.jpg"; - - auto model = vis::ppogg::YOLOv5Lite(model_file); - if (!model.Initialized()) { - std::cerr << "Init Failed! Model: " << model_file << std::endl; - return -1; - } else { - std::cout << "Init Done! Model:" << model_file << std::endl; - } - model.EnableDebug(); - - cv::Mat im = cv::imread(img_path); - cv::Mat vis_im = im.clone(); - - vis::DetectionResult res; - if (!model.Predict(&im, &res)) { - std::cerr << "Prediction Failed." << std::endl; - return -1; - } else { - std::cout << "Prediction Done!" << std::endl; - } - - // 输出预测框结果 - std::cout << res.Str() << std::endl; - - // 可视化预测结果 - vis::Visualize::VisDetection(&vis_im, res); - cv::imwrite(vis_path, vis_im); - std::cout << "Detect Done! Saved: " << vis_path << std::endl; - return 0; -} diff --git a/examples/vision/ppseg_unet.cc b/examples/vision/ppseg_unet.cc deleted file mode 100644 index cb33611ad..000000000 --- a/examples/vision/ppseg_unet.cc +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision.h" -#include "yaml-cpp/yaml.h" - -int main() { - namespace vis = fastdeploy::vision; - - std::string model_file = "../resources/models/unet_Cityscapes/model.pdmodel"; - std::string params_file = - "../resources/models/unet_Cityscapes/model.pdiparams"; - std::string config_file = "../resources/models/unet_Cityscapes/deploy.yaml"; - std::string img_path = "../resources/images/cityscapes_demo.png"; - std::string vis_path = "../resources/outputs/vis.jpeg"; - - auto model = vis::ppseg::Model(model_file, params_file, config_file); - if (!model.Initialized()) { - std::cerr << "Init Failed." << std::endl; - return -1; - } - - cv::Mat im = cv::imread(img_path); - cv::Mat vis_im; - - vis::SegmentationResult res; - if (!model.Predict(&im, &res)) { - std::cerr << "Prediction Failed." << std::endl; - return -1; - } else { - std::cout << "Prediction Done!" << std::endl; - } - - // 输出预测框结果 - std::cout << res.Str() << std::endl; - - YAML::Node cfg = YAML::LoadFile(config_file); - int num_classes = 19; - if (cfg["Deploy"]["num_classes"]) { - num_classes = cfg["Deploy"]["num_classes"].as(); - } - - // 可视化预测结果 - vis::Visualize::VisSegmentation(im, res, &vis_im, num_classes); - cv::imwrite(vis_path, vis_im); - std::cout << "Inference Done! Saved: " << vis_path << std::endl; - return 0; -} diff --git a/examples/vision/rangilyu_nanodet_plus.cc b/examples/vision/rangilyu_nanodet_plus.cc deleted file mode 100644 index 91dcd604e..000000000 --- a/examples/vision/rangilyu_nanodet_plus.cc +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision.h" - -int main() { - namespace vis = fastdeploy::vision; - - std::string model_file = "../resources/models/nanodet-plus-m_320.onnx"; - std::string img_path = "../resources/images/bus.jpg"; - std::string vis_path = - "../resources/outputs/rangilyu_nanodet_plus_vis_result.jpg"; - - auto model = vis::rangilyu::NanoDetPlus(model_file); - if (!model.Initialized()) { - std::cerr << "Init Failed! Model: " << model_file << std::endl; - return -1; - } else { - std::cout << "Init Done! Model:" << model_file << std::endl; - } - model.EnableDebug(); - - cv::Mat im = cv::imread(img_path); - cv::Mat vis_im = im.clone(); - - vis::DetectionResult res; - if (!model.Predict(&im, &res)) { - std::cerr << "Prediction Failed." << std::endl; - return -1; - } else { - std::cout << "Prediction Done!" << std::endl; - } - - // 输出预测框结果 - std::cout << res.Str() << std::endl; - - // 可视化预测结果 - vis::Visualize::VisDetection(&vis_im, res); - cv::imwrite(vis_path, vis_im); - std::cout << "Detect Done! Saved: " << vis_path << std::endl; - return 0; -} diff --git a/examples/vision/ultralytics_yolov5.cc b/examples/vision/ultralytics_yolov5.cc deleted file mode 100644 index 42a233686..000000000 --- a/examples/vision/ultralytics_yolov5.cc +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision.h" - -int main() { - namespace vis = fastdeploy::vision; - - std::string model_file = "../resources/models/yolov5s.onnx"; - std::string img_path = "../resources/images/bus.jpg"; - std::string vis_path = "../resources/outputs/ultralytics_yolov5_vis_result.jpg"; - - auto model = vis::ultralytics::YOLOv5(model_file); - if (!model.Initialized()) { - std::cerr << "Init Failed! Model: " << model_file << std::endl; - return -1; - } else { - std::cout << "Init Done! Model:" << model_file << std::endl; - } - model.EnableDebug(); - - cv::Mat im = cv::imread(img_path); - cv::Mat vis_im = im.clone(); - - vis::DetectionResult res; - if (!model.Predict(&im, &res)) { - std::cerr << "Prediction Failed." << std::endl; - return -1; - } else { - std::cout << "Prediction Done!" << std::endl; - } - - // 输出预测框结果 - std::cout << res.Str() << std::endl; - - // 可视化预测结果 - vis::Visualize::VisDetection(&vis_im, res); - cv::imwrite(vis_path, vis_im); - std::cout << "Detect Done! Saved: " << vis_path << std::endl; - return 0; -} diff --git a/examples/vision/wongkinyiu_scaledyolov4.cc b/examples/vision/wongkinyiu_scaledyolov4.cc deleted file mode 100644 index 5374d3453..000000000 --- a/examples/vision/wongkinyiu_scaledyolov4.cc +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision.h" - -int main() { - namespace vis = fastdeploy::vision; - - std::string model_file = "../resources/models/scaledyolov4.onnx"; - std::string img_path = "../resources/images/bus.jpg"; - std::string vis_path = "../resources/outputs/wongkinyiu_scaledyolov4_vis_result.jpg"; - - auto model = vis::wongkinyiu::ScaledYOLOv4(model_file); - if (!model.Initialized()) { - std::cerr << "Init Failed! Model: " << model_file << std::endl; - return -1; - } else { - std::cout << "Init Done! Model:" << model_file << std::endl; - } - model.EnableDebug(); - - cv::Mat im = cv::imread(img_path); - cv::Mat vis_im = im.clone(); - - vis::DetectionResult res; - if (!model.Predict(&im, &res)) { - std::cerr << "Prediction Failed." << std::endl; - return -1; - } else { - std::cout << "Prediction Done!" << std::endl; - } - - // 输出预测框结果 - std::cout << res.Str() << std::endl; - - // 可视化预测结果 - vis::Visualize::VisDetection(&vis_im, res); - cv::imwrite(vis_path, vis_im); - std::cout << "Detect Done! Saved: " << vis_path << std::endl; - return 0; -} diff --git a/examples/vision/wongkinyiu_yolor.cc b/examples/vision/wongkinyiu_yolor.cc deleted file mode 100644 index abdca2b7f..000000000 --- a/examples/vision/wongkinyiu_yolor.cc +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision.h" - -int main() { - namespace vis = fastdeploy::vision; - - std::string model_file = "../resources/models/yolor.onnx"; - std::string img_path = "../resources/images/horses.jpg"; - std::string vis_path = "../resources/outputs/wongkinyiu_yolor_vis_result.jpg"; - - auto model = vis::wongkinyiu::YOLOR(model_file); - if (!model.Initialized()) { - std::cerr << "Init Failed! Model: " << model_file << std::endl; - return -1; - } else { - std::cout << "Init Done! Model:" << model_file << std::endl; - } - model.EnableDebug(); - - cv::Mat im = cv::imread(img_path); - cv::Mat vis_im = im.clone(); - - vis::DetectionResult res; - if (!model.Predict(&im, &res)) { - std::cerr << "Prediction Failed." << std::endl; - return -1; - } else { - std::cout << "Prediction Done!" << std::endl; - } - - // 输出预测框结果 - std::cout << res.Str() << std::endl; - - // 可视化预测结果 - vis::Visualize::VisDetection(&vis_im, res); - cv::imwrite(vis_path, vis_im); - std::cout << "Detect Done! Saved: " << vis_path << std::endl; - return 0; -} diff --git a/examples/vision/wongkinyiu_yolov7.cc b/examples/vision/wongkinyiu_yolov7.cc deleted file mode 100644 index 7de033cae..000000000 --- a/examples/vision/wongkinyiu_yolov7.cc +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision.h" - -int main() { - namespace vis = fastdeploy::vision; - - std::string model_file = "../resources/models/yolov7.onnx"; - std::string img_path = "../resources/images/horses.jpg"; - std::string vis_path = "../resources/outputs/wongkinyiu_yolov7_vis_result.jpg"; - - auto model = vis::wongkinyiu::YOLOv7(model_file); - if (!model.Initialized()) { - std::cerr << "Init Failed! Model: " << model_file << std::endl; - return -1; - } else { - std::cout << "Init Done! Model:" << model_file << std::endl; - } - model.EnableDebug(); - - cv::Mat im = cv::imread(img_path); - cv::Mat vis_im = im.clone(); - - vis::DetectionResult res; - if (!model.Predict(&im, &res)) { - std::cerr << "Prediction Failed." << std::endl; - return -1; - } else { - std::cout << "Prediction Done!" << std::endl; - } - - // 输出预测框结果 - std::cout << res.Str() << std::endl; - - // 可视化预测结果 - vis::Visualize::VisDetection(&vis_im, res); - cv::imwrite(vis_path, vis_im); - std::cout << "Detect Done! Saved: " << vis_path << std::endl; - return 0; -} diff --git a/examples/vision/zhkkke_modnet.cc b/examples/vision/zhkkke_modnet.cc deleted file mode 100644 index ee9f8df25..000000000 --- a/examples/vision/zhkkke_modnet.cc +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision.h" - -int main() { - namespace vis = fastdeploy::vision; - - std::string model_file = - "../resources/models/modnet_photographic_portrait_matting.onnx"; - std::string img_path = "../resources/images/matting_1.jpg"; - std::string vis_path = "../resources/outputs/zhkkke_modnet_vis_result.jpg"; - - auto model = vis::zhkkke::MODNet(model_file); - if (!model.Initialized()) { - std::cerr << "Init Failed! Model: " << model_file << std::endl; - return -1; - } else { - std::cout << "Init Done! Model:" << model_file << std::endl; - } - model.EnableDebug(); - - // 设置推理size, 必须和模型文件支持的 - model.size = {256, 256}; - - cv::Mat im = cv::imread(img_path); - cv::Mat im_old = im.clone(); - cv::Mat vis_im = im.clone(); - - vis::MattingResult res; - if (!model.Predict(&im, &res)) { - std::cerr << "Prediction Failed." << std::endl; - return -1; - } - std::cout << "Prediction Done!" << std::endl; - - // 输出预测结果 - std::cout << res.Str() << std::endl; - - // 可视化预测结果 - bool remove_small_connected_area = true; - vis::Visualize::VisMattingAlpha(im_old, res, &vis_im, - remove_small_connected_area); - cv::imwrite(vis_path, vis_im); - std::cout << "Detect Done! Saved: " << vis_path << std::endl; - return 0; -} diff --git a/model_zoo/.gitignore b/model_zoo/.gitignore deleted file mode 100644 index e3919c57f..000000000 --- a/model_zoo/.gitignore +++ /dev/null @@ -1,12 +0,0 @@ -*.png -*.jpg -*.jpeg -*.onnx -*.zip -*.tar -*.pd* -*.engine -*.trt -*.nb -*.tgz -*.gz diff --git a/model_zoo/text/ernie-3.0/README.md b/model_zoo/text/ernie-3.0/README.md deleted file mode 100755 index c60148579..000000000 --- a/model_zoo/text/ernie-3.0/README.md +++ /dev/null @@ -1,238 +0,0 @@ -# ERNIE 3.0 Python部署指南 -本文介绍 ERNIE 3.0 Python 端的部署,包括部署环境的准备,序列标注和分类两大场景下的使用示例。 -- [ERNIE 3.0 Python 部署指南](#ERNIE3.0Python部署指南) - - [1. 环境准备](#1-环境准备) - - [1.1 CPU 端](#11-CPU端) - - [1.2 GPU 端](#12-GPU端) - - [2. 序列标注模型推理](#2-序列标注模型推理) - - [2.1 模型获取](#21-模型获取) - - [2.2 CPU 端推理样例](#22-CPU端推理样例) - - [2.3 GPU 端推理样例](#23-GPU端推理样例) - - [3. 分类模型推理](#3-分类模型推理) - - [3.1 模型获取](#31-模型获取) - - [3.2 CPU 端推理样例](#32-CPU端推理样例) - - [3.3 GPU 端推理样例](#33-GPU端推理样例) -## 1. 环境准备 -ERNIE 3.0 的部署分为 CPU 和 GPU 两种情况,请根据你的部署环境安装对应的依赖。 -### 1.1 CPU端 -CPU 端的部署请使用如下命令安装所需依赖 -``` -pip install -r requirements_cpu.txt -``` -### 1.2 GPU端 -为了在 GPU 上获得最佳的推理性能和稳定性,请先确保机器已正确安装 NVIDIA 相关驱动和基础软件,确保 CUDA >= 11.2,CuDNN >= 8.2,并使用以下命令安装所需依赖 -``` -pip install -r requirements_gpu.txt -``` -如需使用半精度(FP16)或量化(INT8)部署,请确保GPU设备的 CUDA 计算能力 (CUDA Compute Capability) 大于 7.0,典型的设备包括 V100、T4、A10、A100、GTX 20 系列和 30 系列显卡等。同时 INT8 推理需要安装 TensorRT 以及包含 TensorRT 预测库的 PaddlePaddle。 -更多关于 CUDA Compute Capability 和精度支持情况请参考 NVIDIA 文档:[GPU硬件与支持精度对照表](https://docs.nvidia.com/deeplearning/tensorrt/archives/tensorrt-840-ea/support-matrix/index.html#hardware-precision-matrix) - -1. TensorRT 安装请参考:[TensorRT安装说明](https://docs.nvidia.com/deeplearning/tensorrt/archives/tensorrt-840-ea/install-guide/index.html#overview),Linux 端简要步骤如下: - - (1)下载 TensorRT8.2 版本,文件名 TensorRT-XXX.tar.gz,[下载链接](https://developer.nvidia.com/tensorrt) - - (2)解压得到 TensorRT-XXX 文件夹 - - (3)通过 export LD_LIBRARY_PATH=TensorRT-XXX/lib:$LD_LIBRARY_PATH 将 lib 路径加入到 LD_LIBRARY_PATH 中 - - (4)使用 pip install 安装 TensorRT-XXX/python 中对应的 TensorRT 安装包 - -2. PaddlePaddle 预测库的安装请参考 [PaddlePaddle 预测库安装文档](https://www.paddlepaddle.org.cn/inference/v2.3/user_guides/source_compile.html),Linux 端简要步骤如下: - - (1)根据 CUDA 环境和 Python 版本下载对应的 PaddlePaddle 预测库,注意须下载支持 TensorRT 的预测包,如 linux-cuda11.2-cudnn8.2-trt8-gcc8.2。[PaddlePaddle 预测库下载路径](https://www.paddlepaddle.org.cn/inference/v2.3/user_guides/download_lib.html#python) - - (2)使用 pip install 安装下载好的 PaddlePaddle 预测库 - - -## 2. 序列标注模型推理 -### 2.1 模型获取 -用户可使用自己训练的模型进行推理,具体训练调优方法可参考[模型训练调优](./../../README.md#微调),也可以使用我们提供的 msra_ner 数据集训练的 ERNIE 3.0 模型,请执行如下命令获取模型: -``` -# 获取序列标注FP32模型 -wget https://paddlenlp.bj.bcebos.com/models/transformers/ernie_3.0/msra_ner_pruned_infer_model.zip -unzip msra_ner_pruned_infer_model.zip -``` -### 2.2 CPU端推理样例 -在 CPU 端,请使用如下命令进行部署 -``` -python infer_cpu.py --task_name token_cls --model_path ./msra_ner_pruned_infer_model/float32 -``` -输出打印如下: -``` -input data: 北京的涮肉,重庆的火锅,成都的小吃都是极具特色的美食。 -The model detects all entities: -entity: 北京 label: LOC pos: [0, 1] -entity: 重庆 label: LOC pos: [6, 7] -entity: 成都 label: LOC pos: [12, 13] ------------------------------ -input data: 乔丹、科比、詹姆斯和姚明都是篮球界的标志性人物。 -The model detects all entities: -entity: 乔丹 label: PER pos: [0, 1] -entity: 科比 label: PER pos: [3, 4] -entity: 詹姆斯 label: PER pos: [6, 8] -entity: 姚明 label: PER pos: [10, 11] ------------------------------ -``` -infer_cpu.py 脚本中的参数说明: -| 参数 |参数说明 | -|----------|--------------| -|--task_name | 配置任务名称,可选 seq_cls 或 token_cls,默认为 seq_cls| -|--model_name_or_path | 模型的路径或者名字,默认为 ernie-3.0-medium-zh| -|--model_path | 用于推理的 Paddle 模型的路径| -|--max_seq_length |最大序列长度,默认为 128| -|--precision_mode | 推理精度,可选 fp32,fp16 或者 int8,当输入非量化模型并设置 int8 时使用动态量化进行加速,默认 fp32 | -|--num_threads | 配置 cpu 的线程数,默认为 cpu 的最大线程数 | - -**Note**:在支持 avx512_vnni 指令集或 Intel® DL Boost 的 CPU 设备上,可设置 precision_mode 为 int8 对 FP32 模型进行动态量化以获得更高的推理性能,具体性能提升情况请查阅[量化性能提升情况](../../README.md#压缩效果)。 -CPU 端,开启动态量化的命令如下: -``` -python infer_cpu.py --task_name token_cls --model_path ./msra_ner_pruned_infer_model/float32 --precision_mode int8 -``` -INT8 的输出打印和 FP32 的输出打印一致。 - -### 2.3 GPU端推理样例 -在 GPU 端,请使用如下命令进行部署 -``` -python infer_gpu.py --task_name token_cls --model_path ./msra_ner_pruned_infer_model/float32 -``` -输出打印如下: -``` -input data: 北京的涮肉,重庆的火锅,成都的小吃都是极具特色的美食。 -The model detects all entities: -entity: 北京 label: LOC pos: [0, 1] -entity: 重庆 label: LOC pos: [6, 7] -entity: 成都 label: LOC pos: [12, 13] ------------------------------ -input data: 乔丹、科比、詹姆斯和姚明都是篮球界的标志性人物。 -The model detects all entities: -entity: 乔丹 label: PER pos: [0, 1] -entity: 科比 label: PER pos: [3, 4] -entity: 詹姆斯 label: PER pos: [6, 8] -entity: 姚明 label: PER pos: [10, 11] ------------------------------ -``` -如果需要 FP16 进行加速,可以设置 precision_mode 为 fp16,具体命令为 -``` -python infer_gpu.py --task_name token_cls --model_path ./msra_ner_pruned_infer_model/float32 --precision_mode fp16 -``` -如果需要进行 INT8 量化加速,还需要使用量化脚本对训练好的 FP32 模型进行量化,然后使用量化后的模型进行部署,模型的量化请参考:[模型量化脚本使用说明](./../../README.md#模型压缩),也可下载我们量化后的 INT8 模型进行部署,请执行如下命令获取模型: -``` -# 获取序列标注 INT8 量化模型 -wget https://paddlenlp.bj.bcebos.com/models/transformers/ernie_3.0/msra_ner_quant_infer_model.zip -unzip msra_ner_quant_infer_model.zip -``` -量化模型的部署命令为: -``` -# 第一步,打开 set_dynamic_shape 开关,自动配置动态shape,在当前目录下生成 dynamic_shape_info.txt 文件 -python infer_gpu.py --task_name token_cls --model_path ./msra_ner_quant_infer_model/int8 --shape_info_file dynamic_shape_info.txt --set_dynamic_shape -# 第二步,读取上一步中生成的 dynamic_shape_info.txt 文件,开启预测 -python infer_gpu.py --task_name token_cls --model_path ./msra_ner_quant_infer_model/int8 --shape_info_file dynamic_shape_info.txt -``` -FP16 和 INT8 推理的运行结果和FP32的运行结果一致。 - -infer_gpu.py 脚本中的参数说明: -| 参数 |参数说明 | -|----------|--------------| -|--task_name | 配置任务名称,可选 seq_cls 或 token_cls,默认为 seq_cls| -|--model_name_or_path | 模型的路径或者名字,默认为ernie-3.0-medium-zh| -|--model_path | 用于推理的 Paddle 模型的路径| -|--batch_size |最大可测的 batch size,默认为 32| -|--max_seq_length |最大序列长度,默认为 128| -|--shape_info_file | 指定 dynamic shape info 的存储文件名,默认为 shape_info.txt | -|--set_dynamic_shape | 配置是否自动配置 TensorRT 的 dynamic shape,在GPU上INT8量化推理时需要先开启此选项进行 dynamic shape 配置,生成 shape_info.txt 后再关闭,默认关闭 | -|--precision_mode | 推理精度,可选 fp32,fp16 或者 int8,默认 fp32 | - -## 3. 分类模型推理 -### 3.1 模型获取 -用户可使用自己训练的模型进行推理,具体训练调优方法可参考[模型训练调优](./../../README.md#微调),也可以使用我们提供的 tnews 数据集训练的 ERNIE 3.0 模型,请执行如下命令获取模型: -``` -# 分类模型模型: -wget https://paddlenlp.bj.bcebos.com/models/transformers/ernie_3.0/tnews_pruned_infer_model.zip -unzip tnews_pruned_infer_model.zip -``` -### 3.2 CPU端推理样例 -在 CPU 端,请使用如下命令进行部署 -``` -python infer_cpu.py --task_name seq_cls --model_path ./tnews_pruned_infer_model/float32 -``` -输出打印如下: -``` -input data: 未来自动驾驶真的会让酒驾和疲劳驾驶成历史吗? -seq cls result: -label: news_car confidence: 0.5543532371520996 ------------------------------ -input data: 黄磊接受华少快问快答,不光智商逆天,情商也不逊黄渤 -seq cls result: -label: news_entertainment confidence: 0.9495906829833984 ------------------------------ -``` -和序列标注模型推理类似,使用动态量化进行加速的命令如下: -``` -python infer_cpu.py --task_name seq_cls --model_path ./tnews_pruned_infer_model/float32 --precision_mode int8 -``` -输出打印如下: -``` -input data: 未来自动驾驶真的会让酒驾和疲劳驾驶成历史吗? -seq cls result: -label: news_car confidence: 0.5778735876083374 ------------------------------ -input data: 黄磊接受华少快问快答,不光智商逆天,情商也不逊黄渤 -seq cls result: -label: news_entertainment confidence: 0.9206441044807434 ------------------------------ -``` -### 3.3 GPU端推理样例 -在 GPU 端,请使用如下命令进行部署 -``` -python infer_gpu.py --task_name seq_cls --model_path ./tnews_pruned_infer_model/float32 -``` -输出打印如下: -``` -input data: 未来自动驾驶真的会让酒驾和疲劳驾驶成历史吗? -seq cls result: -label: news_car confidence: 0.5543532371520996 ------------------------------ -input data: 黄磊接受华少快问快答,不光智商逆天,情商也不逊黄渤 -seq cls result: -label: news_entertainment confidence: 0.9495906829833984 ------------------------------ -``` -如果需要 FP16 进行加速,可以设置 precision_mode 为 fp16,具体命令为 -``` -python infer_gpu.py --task_name seq_cls --model_path ./tnews_pruned_infer_model/float32 --precision_mode fp16 -``` -输出打印如下: -``` -input data: 未来自动驾驶真的会让酒驾和疲劳驾驶成历史吗? -seq cls result: -label: news_car confidence: 0.5536671876907349 ------------------------------ -input data: 黄磊接受华少快问快答,不光智商逆天,情商也不逊黄渤 -seq cls result: -label: news_entertainment confidence: 0.9494127035140991 ------------------------------ -``` -如果需要进行 INT8 量化加速,还需要使用量化脚本对训练好的 FP32 模型进行量化,然后使用量化后的模型进行部署,模型的量化请参考:[模型量化脚本使用说明](./../../README.md#模型压缩),也可下载我们量化后的 INT8 模型进行部署,请执行如下命令获取模型: -``` -# 获取序列标注 INT8 量化模型 -wget https://paddlenlp.bj.bcebos.com/models/transformers/ernie_3.0/tnews_quant_infer_model.zip -unzip tnews_quant_infer_model.zip -``` -量化模型的部署命令为: -``` -# 第一步,打开 set_dynamic_shape 开关,自动配置动态shape,在当前目录下生成 dynamic_shape_info.txt 文件 -python infer_gpu.py --task_name seq_cls --model_path ./tnews_quant_infer_model/int8 --shape_info_file dynamic_shape_info.txt --set_dynamic_shape -# 第二步,读取上一步中生成的 dynamic_shape_info.txt 文件,开启预测 -python infer_gpu.py --task_name seq_cls --model_path ./tnews_quant_infer_model/int8 --shape_info_file dynamic_shape_info.txt -``` -输出打印如下: -``` -input data: 未来自动驾驶真的会让酒驾和疲劳驾驶成历史吗? -seq cls result: -label: news_car confidence: 0.5510320067405701 ------------------------------ -input data: 黄磊接受华少快问快答,不光智商逆天,情商也不逊黄渤 -seq cls result: -label: news_entertainment confidence: 0.9432708024978638 ------------------------------ -``` diff --git a/model_zoo/text/ernie-3.0/ernie_predictor.py b/model_zoo/text/ernie-3.0/ernie_predictor.py deleted file mode 100755 index 61162de97..000000000 --- a/model_zoo/text/ernie-3.0/ernie_predictor.py +++ /dev/null @@ -1,242 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import six -import os -import numpy as np -# import paddle -from psutil import cpu_count -from paddlenlp.transformers import AutoTokenizer -import fastdeploy - - -def token_cls_print_ret(infer_result, input_data): - rets = infer_result["value"] - for i, ret in enumerate(rets): - print("input data:", input_data[i]) - print("The model detects all entities:") - for iterm in ret: - print("entity:", iterm["entity"], " label:", iterm["label"], - " pos:", iterm["pos"]) - print("-----------------------------") - - -def seq_cls_print_ret(infer_result, input_data): - label_list = [ - "news_story", "news_culture", "news_entertainment", "news_sports", - "news_finance", "news_house", "news_car", "news_edu", "news_tech", - "news_military", "news_travel", "news_world", "news_stock", - "news_agriculture", "news_game" - ] - label = infer_result["label"].squeeze().tolist() - confidence = infer_result["confidence"].squeeze().tolist() - for i, ret in enumerate(infer_result): - print("input data:", input_data[i]) - print("seq cls result:") - print("label:", label_list[label[i]], " confidence:", confidence[i]) - print("-----------------------------") - - -class ErniePredictor(object): - def __init__(self, args): - if not isinstance(args.device, six.string_types): - print( - ">>> [InferBackend] The type of device must be string, but the type you set is: ", - type(device)) - exit(0) - args.device = args.device.lower() - if args.device not in ['cpu', 'gpu', 'xpu']: - print( - ">>> [InferBackend] The device must be cpu or gpu, but your device is set to:", - type(args.device)) - exit(0) - - self.task_name = args.task_name - self.tokenizer = AutoTokenizer.from_pretrained( - args.model_name_or_path, use_faster=True) - if args.task_name == 'seq_cls': - self.label_names = [] - self.preprocess = self.seq_cls_preprocess - self.postprocess = self.seq_cls_postprocess - self.printer = seq_cls_print_ret - elif args.task_name == 'token_cls': - self.label_names = [ - 'O', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG', 'B-LOC', 'I-LOC' - ] - self.preprocess = self.token_cls_preprocess - self.postprocess = self.token_cls_postprocess - self.printer = token_cls_print_ret - else: - print( - "[ErniePredictor]: task_name only support seq_cls and token_cls now." - ) - exit(0) - - self.max_seq_length = args.max_seq_length - - if args.device == 'cpu': - args.set_dynamic_shape = False - args.shape_info_file = None - args.batch_size = 32 - if args.device == 'gpu': - args.num_threads = cpu_count(logical=False) - # Set the runtime option - runtime_option = fastdeploy.RuntimeOption() - runtime_option.set_model_path(args.model_path + ".pdmodel", - args.model_path + ".pdiparams") - precision_mode = args.precision_mode.lower() - use_fp16 = precision_mode == "fp16" - # runtime_option.use_paddle_backend() - if args.device == 'cpu': - runtime_option.use_cpu() - runtime_option.set_cpu_thread_num(args.num_threads) - if use_fp16: - runtime_option.enable_paddle_mkldnn() - elif args.device == 'gpu': - runtime_option.use_gpu() - if use_fp16: - runtime_option.use_trt_backend() - runtime_option.enable_trt_fp16() - - self.inference_backend = fastdeploy.Runtime(runtime_option._option) - if args.set_dynamic_shape: - # If set_dynamic_shape is turned on, all required dynamic shapes will be - # automatically set according to the batch_size and max_seq_length. - self.set_dynamic_shape(args.max_seq_length, args.batch_size) - exit(0) - - def seq_cls_preprocess(self, input_data: list): - data = input_data - # tokenizer + pad - data = self.tokenizer( - data, - max_length=self.max_seq_length, - padding=True, - truncation=True) - input_ids = data["input_ids"] - token_type_ids = data["token_type_ids"] - return { - "input_ids": np.array( - input_ids, dtype="int64"), - "token_type_ids": np.array( - token_type_ids, dtype="int64") - } - - def seq_cls_postprocess(self, infer_data, input_data): - logits = np.array(infer_data[0]) - max_value = np.max(logits, axis=1, keepdims=True) - exp_data = np.exp(logits - max_value) - probs = exp_data / np.sum(exp_data, axis=1, keepdims=True) - out_dict = { - "label": probs.argmax(axis=-1), - "confidence": probs.max(axis=-1) - } - return out_dict - - def token_cls_preprocess(self, data: list): - # tokenizer + pad - is_split_into_words = False - if isinstance(data[0], list): - is_split_into_words = True - data = self.tokenizer( - data, - max_length=self.max_seq_length, - padding=True, - truncation=True, - is_split_into_words=is_split_into_words) - - input_ids = data["input_ids"] - token_type_ids = data["token_type_ids"] - return { - "input_ids": np.array( - input_ids, dtype="int64"), - "token_type_ids": np.array( - token_type_ids, dtype="int64") - } - - def token_cls_postprocess(self, infer_data, input_data): - result = np.array(infer_data[0]) - tokens_label = result.argmax(axis=-1).tolist() - # 获取batch中每个token的实体 - value = [] - for batch, token_label in enumerate(tokens_label): - start = -1 - label_name = "" - items = [] - for i, label in enumerate(token_label): - if (self.label_names[label] == "O" or - "B-" in self.label_names[label]) and start >= 0: - entity = input_data[batch][start:i - 1] - if isinstance(entity, list): - entity = "".join(entity) - items.append({ - "pos": [start, i - 2], - "entity": entity, - "label": label_name, - }) - start = -1 - if "B-" in self.label_names[label]: - start = i - 1 - label_name = self.label_names[label][2:] - if start >= 0: - items.append({ - "pos": [start, len(token_label) - 1], - "entity": input_data[batch][start:len(token_label) - 1], - "label": "" - }) - value.append(items) - - out_dict = {"value": value, "tokens_label": tokens_label} - return out_dict - - def set_dynamic_shape(self, max_seq_length, batch_size): - # The dynamic shape info required by TRT is automatically generated - # according to max_seq_length and batch_size and stored in shape_info.txt - min_batch_size, max_batch_size, opt_batch_size = 1, batch_size, batch_size - min_seq_len, max_seq_len, opt_seq_len = 2, max_seq_length, max_seq_length - batches = [ - { - "input_ids": np.zeros( - [min_batch_size, min_seq_len], dtype="int64"), - "token_type_ids": np.zeros( - [min_batch_size, min_seq_len], dtype="int64") - }, - { - "input_ids": np.zeros( - [max_batch_size, max_seq_len], dtype="int64"), - "token_type_ids": np.zeros( - [max_batch_size, max_seq_len], dtype="int64") - }, - { - "input_ids": np.zeros( - [opt_batch_size, opt_seq_len], dtype="int64"), - "token_type_ids": np.zeros( - [opt_batch_size, opt_seq_len], dtype="int64") - }, - ] - for batch in batches: - self.inference_backend.infer(batch) - print( - "[InferBackend] Set dynamic shape finished, please close set_dynamic_shape and restart." - ) - - def infer(self, data): - return self.inference_backend.infer(data) - - def predict(self, input_data: list): - preprocess_result = self.preprocess(input_data) - infer_result = self.infer(preprocess_result) - result = self.postprocess(infer_result, input_data) - self.printer(result, input_data) - return result diff --git a/model_zoo/text/ernie-3.0/infer_cpu.py b/model_zoo/text/ernie-3.0/infer_cpu.py deleted file mode 100755 index 3ab8121a5..000000000 --- a/model_zoo/text/ernie-3.0/infer_cpu.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle -import argparse -from psutil import cpu_count -from ernie_predictor import ErniePredictor - - -def parse_args(): - parser = argparse.ArgumentParser() - # Required parameters - parser.add_argument( - "--task_name", - default='seq_cls', - type=str, - help="The name of the task to perform predict, selected in: seq_cls and token_cls" - ) - parser.add_argument( - "--model_name_or_path", - default="ernie-3.0-medium-zh", - type=str, - help="The directory or name of model.", ) - parser.add_argument( - "--model_path", - type=str, - required=True, - help="The path prefix of inference model to be used.", ) - parser.add_argument( - "--max_seq_length", - default=128, - type=int, - help="The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded.", ) - parser.add_argument( - "--precision_mode", - type=str, - default="fp32", - choices=["fp32", "int8"], - help="Inference precision, set int8 to use dynamic quantization for acceleration.", - ) - parser.add_argument( - "--num_threads", - default=cpu_count(logical=False), - type=int, - help="num_threads for cpu.", ) - args = parser.parse_args() - return args - - -def main(): - args = parse_args() - - args.task_name = args.task_name.lower() - args.device = 'cpu' - predictor = ErniePredictor(args) - - if args.task_name == 'seq_cls': - text = ["未来自动驾驶真的会让酒驾和疲劳驾驶成历史吗?", "黄磊接受华少快问快答,不光智商逆天,情商也不逊黄渤"] - elif args.task_name == 'token_cls': - text = ["北京的涮肉,重庆的火锅,成都的小吃都是极具特色的美食。", "乔丹、科比、詹姆斯和姚明都是篮球界的标志性人物。"] - - outputs = predictor.predict(text) - - -if __name__ == "__main__": - main() diff --git a/model_zoo/text/ernie-3.0/infer_gpu.py b/model_zoo/text/ernie-3.0/infer_gpu.py deleted file mode 100755 index 4175a2929..000000000 --- a/model_zoo/text/ernie-3.0/infer_gpu.py +++ /dev/null @@ -1,84 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle -import argparse -from ernie_predictor import ErniePredictor - - -def parse_args(): - parser = argparse.ArgumentParser() - # Required parameters - parser.add_argument( - "--task_name", - default='seq_cls', - type=str, - help="The name of the task to perform predict, selected in: seq_cls and token_cls" - ) - parser.add_argument( - "--model_name_or_path", - default="ernie-3.0-medium-zh", - type=str, - help="The directory or name of model.", ) - parser.add_argument( - "--model_path", - type=str, - required=True, - help="The path prefix of inference model to be used.", ) - parser.add_argument( - "--batch_size", - default=32, - type=int, - help="Batch size for predict.", ) - parser.add_argument( - "--max_seq_length", - default=128, - type=int, - help="The maximum total input sequence length after tokenization. Sequences longer " - "than this will be truncated, sequences shorter will be padded.", ) - parser.add_argument( - "--set_dynamic_shape", - action='store_true', - help="Whether to automatically set dynamic shape.", ) - parser.add_argument( - "--shape_info_file", - default="shape_info.txt", - type=str, - help="The collected dynamic shape info file.", ) - parser.add_argument( - "--precision_mode", - type=str, - default="fp32", - choices=["fp32", "fp16", "int8"], - help="Inference precision.", ) - args = parser.parse_args() - return args - - -def main(): - args = parse_args() - args.task_name = args.task_name.lower() - args.device = 'gpu' - predictor = ErniePredictor(args) - - if args.task_name == 'seq_cls': - text = ["未来自动驾驶真的会让酒驾和疲劳驾驶成历史吗?", "黄磊接受华少快问快答,不光智商逆天,情商也不逊黄渤"] - elif args.task_name == 'token_cls': - text = ["北京的涮肉,重庆的火锅,成都的小吃都是极具特色的美食。", "乔丹、科比、詹姆斯和姚明都是篮球界的标志性人物。"] - - outputs = predictor.predict(text) - - -if __name__ == "__main__": - main() diff --git a/model_zoo/text/ernie-3.0/requirements_cpu.txt b/model_zoo/text/ernie-3.0/requirements_cpu.txt deleted file mode 100755 index 9725b9194..000000000 --- a/model_zoo/text/ernie-3.0/requirements_cpu.txt +++ /dev/null @@ -1,3 +0,0 @@ -onnxruntime -psutil -paddlenlp diff --git a/model_zoo/text/ernie-3.0/requirements_gpu.txt b/model_zoo/text/ernie-3.0/requirements_gpu.txt deleted file mode 100755 index bd5e113bf..000000000 --- a/model_zoo/text/ernie-3.0/requirements_gpu.txt +++ /dev/null @@ -1,4 +0,0 @@ -onnxruntime-gpu -onnxconverter-common -psutil -paddlenlp diff --git a/model_zoo/vision/arcface/README.md b/model_zoo/vision/arcface/README.md deleted file mode 100644 index 478b695c1..000000000 --- a/model_zoo/vision/arcface/README.md +++ /dev/null @@ -1,80 +0,0 @@ -# ArcFace部署示例 - -## 0. 简介 -当前支持模型版本为:[ArcFace CommitID:babb9a5](https://github.com/deepinsight/insightface/commit/babb9a5) - -本文档说明如何进行[ArcFace](https://github.com/deepinsight/insightface/tree/master/recognition/arcface_torch) 的快速部署推理。本目录结构如下 - -``` -. -├── cpp # C++ 代码目录 -│   ├── CMakeLists.txt # C++ 代码编译CMakeLists文件 -│   ├── README.md # C++ 代码编译部署文档 -│   └── arcface.cc # C++ 示例代码 -├── api.md # API 说明文档 -├── README.md # ArcFace 部署文档 -└── arcface.py # Python示例代码 -``` - -## 1. 特别说明 -fastdeploy支持 [insightface](https://github.com/deepinsight/insightface/tree/master/recognition) 的人脸识别模块recognition中大部分模型的部署,包括ArcFace、CosFace、Partial FC、VPL等,由于用法类似,这里仅用ArcFace来演示部署流程。所有支持的模型结构,可参考 [ArcFace API文档](./api.md). - - -## 2. 获取ONNX文件 - -访问[ArcFace](https://github.com/deepinsight/insightface/tree/master/recognition/arcface_torch)官方github库,按照指引下载安装,下载pt模型文件,利用 `torch2onnx.py` 得到`onnx`格式文件。 - -* 下载ArcFace模型文件 - ``` - Link: https://pan.baidu.com/share/init?surl=CL-l4zWqsI1oDuEEYVhj-g code: e8pw - ``` - -* 导出onnx格式文件 - ```bash - PYTHONPATH=. python ./torch2onnx.py ms1mv3_arcface_r100_fp16/backbone.pth --output ms1mv3_arcface_r100.onnx --network r100 --simplify 1 - ``` -* 移动onnx文件到model_zoo/arcface的目录 - ```bash - cp PATH/TO/ms1mv3_arcface_r100.onnx PATH/TO/model_zoo/vision/arcface/ - ``` - - -## 3. 准备测试图片 -准备3张仅包含人脸的测试图片,命名为face_recognition_*.jpg,并拷贝到可执行文件所在的目录,比如 -```bash -face_recognition_0.png # 0,1 同一个人 -face_recognition_1.png -face_recognition_2.png # 0,2 不同的人 -``` - -## 4. 安装FastDeploy - -使用如下命令安装FastDeploy,注意到此处安装的是`vision-cpu`,也可根据需求安装`vision-gpu` -```bash -# 安装fastdeploy-python工具 -pip install fastdeploy-python - -# 安装vision-cpu模块 -fastdeploy install vision-cpu -``` - -## 5. Python部署 - -执行如下代码即会自动下载ArcFace模型和测试图片 -```bash -python arcface.py -``` - -执行完成后会输出检测结果如下 -``` -FaceRecognitionResult: [Dim(512), Min(-0.141219), Max(0.121645), Mean(-0.003172)] -FaceRecognitionResult: [Dim(512), Min(-0.117939), Max(0.141897), Mean(0.000407)] -FaceRecognitionResult: [Dim(512), Min(-0.124471), Max(0.112567), Mean(-0.001320)] -Cosine 01: 0.7211584683376316 -Cosine 02: -0.06262668682788906 -``` - -## 6. 其它文档 - -- [C++部署](./cpp/README.md) -- [ArcFace API文档](./api.md) diff --git a/model_zoo/vision/arcface/api.md b/model_zoo/vision/arcface/api.md deleted file mode 100644 index b509d5cb2..000000000 --- a/model_zoo/vision/arcface/api.md +++ /dev/null @@ -1,113 +0,0 @@ -# ArcFace API说明 - -## 0. 特别说明 -fastdeploy支持 [insightface](https://github.com/deepinsight/insightface/tree/master/recognition) 的人脸识别模块recognition中大部分模型的部署,包括ArcFace、CosFace、Partial FC、VPL等,由于用法类似,这里仅用ArcFace来说明参数设置。 - -## 1. Python API - -### 1.1 ArcFace 类 - -#### 1.1.1 类初始化说明 -```python -fastdeploy.vision.deepinsight.ArcFace(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX) -``` -ArcFace模型加载和初始化,当model_format为`fd.Frontend.ONNX`时,只需提供model_file,如`xxx.onnx`;当model_format为`fd.Frontend.PADDLE`时,则需同时提供model_file和params_file。 - -**参数** - -> * **model_file**(str): 模型文件路径 -> * **params_file**(str): 参数文件路径 -> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 -> * **model_format**(Frontend): 模型格式 - -#### 1.1.2 predict函数 -> ```python -> ArcFace.predict(image_data) -> ``` -> 模型预测结口,输入图像直接输出检测结果。 -> -> **参数** -> -> > * **image_data**(np.ndarray): 输入数据,注意需为HWC,BGR格式 - -示例代码参考[arcface.py](./arcface.py) - -### 1.2 其他支持的类 -```python -fastdeploy.vision.deepinsight.ArcFace(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX) -fastdeploy.vision.deepinsight.CosFace(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX) -fastdeploy.vision.deepinsight.PartialFC(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX) -fastdeploy.vision.deepinsight.VPL(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX) -fastdeploy.vision.deepinsight.InsightFaceRecognitionModel(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX) -``` -Tips: 如果 [insightface](https://github.com/deepinsight/insightface/tree/master/recognition) 人脸识别的推理逻辑没有随它自身的版本发生太大变化,则可以都统一使用 InsightFaceRecognitionModel 进行推理。 - - - -## 2. C++ API - -### 2.1 ArcFace 类 -#### 2.1.1 类初始化说明 -```C++ -fastdeploy::vision::deepinsight::ArcFace( - const string& model_file, - const string& params_file = "", - const RuntimeOption& runtime_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX) -``` -ArcFace模型加载和初始化,当model_format为`Frontend::ONNX`时,只需提供model_file,如`xxx.onnx`;当model_format为`Frontend::PADDLE`时,则需同时提供model_file和params_file。 - -**参数** - -> * **model_file**(str): 模型文件路径 -> * **params_file**(str): 参数文件路径 -> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 -> * **model_format**(Frontend): 模型格式 - -#### 2.1.2 Predict函数 -> ```C++ -> ArcFace::Predict(cv::Mat* im, FaceRecognitionResult* result) -> ``` -> 模型预测接口,输入图像直接输出检测结果。 -> -> **参数** -> -> > * **im**: 输入图像,注意需为HWC,BGR格式 -> > * **result**: 检测结果,result的成员embedding包含人脸向量 - -示例代码参考[cpp/arcface.cc](cpp/arcface.cc) - -### 2.2 其他支持的类 -```C++ -fastdeploy::vision::deepinsight::ArcFace( - const string& model_file, - const string& params_file = "", - const RuntimeOption& runtime_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX); -fastdeploy::vision::deepinsight::CosFace( - const string& model_file, - const string& params_file = "", - const RuntimeOption& runtime_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX); -fastdeploy::vision::deepinsight::PartialFC( - const string& model_file, - const string& params_file = "", - const RuntimeOption& runtime_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX); -fastdeploy::vision::deepinsight::VPL( - const string& model_file, - const string& params_file = "", - const RuntimeOption& runtime_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX); -fastdeploy::vision::deepinsight::InsightFaceRecognitionModel( - const string& model_file, - const string& params_file = "", - const RuntimeOption& runtime_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX); -``` -Tips: 如果 [insightface](https://github.com/deepinsight/insightface/tree/master/recognition) 人脸识别的推理逻辑没有随它自身的版本发生太大变化,则可以都统一使用 InsightFaceRecognitionModel 进行推理。 - - -## 3. 其它API使用 - -- [模型部署RuntimeOption配置](../../../docs/api/runtime_option.md) diff --git a/model_zoo/vision/arcface/arcface.py b/model_zoo/vision/arcface/arcface.py deleted file mode 100644 index 8c5fdfdf1..000000000 --- a/model_zoo/vision/arcface/arcface.py +++ /dev/null @@ -1,46 +0,0 @@ -import fastdeploy as fd -import numpy as np -import cv2 - - -# 余弦相似度 -def cosine_similarity(a, b): - a = np.array(a) - b = np.array(b) - mul_a = np.linalg.norm(a, ord=2) - mul_b = np.linalg.norm(b, ord=2) - mul_ab = np.dot(a, b) - return mul_ab / (np.sqrt(mul_a) * np.sqrt(mul_b)) - - -# 加载模型 -model = fd.vision.deepinsight.ArcFace("ms1mv3_arcface_r100.onnx") -print("Initialed model!") - -# 加载图片 -face0 = cv2.imread("face_recognition_0.png") # 0,1 同一个人 -face1 = cv2.imread("face_recognition_1.png") -face2 = cv2.imread("face_recognition_2.png") # 0,2 不同的人 - -# 设置 l2 normalize -model.l2_normalize = True - -result0 = model.predict(face0) -result1 = model.predict(face1) -result2 = model.predict(face2) - -# 计算余弦相似度 -embedding0 = result0.embedding -embedding1 = result1.embedding -embedding2 = result2.embedding - -cosine01 = cosine_similarity(embedding0, embedding1) -cosine02 = cosine_similarity(embedding0, embedding2) - -# 打印结果 -print(result0, end="") -print(result1, end="") -print(result2, end="") -print("Cosine 01: ", cosine01) -print("Cosine 02: ", cosine02) -print(model.runtime_option) diff --git a/model_zoo/vision/arcface/cpp/CMakeLists.txt b/model_zoo/vision/arcface/cpp/CMakeLists.txt deleted file mode 100644 index 387324f16..000000000 --- a/model_zoo/vision/arcface/cpp/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -PROJECT(arcface_demo C CXX) -CMAKE_MINIMUM_REQUIRED(VERSION 3.16) - -# 在低版本ABI环境中,通过如下代码进行兼容性编译 -# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) - -# 指定下载解压后的fastdeploy库路径 -set(FASTDEPLOY_INSTALL_DIR ${PROJECT_SOURCE_DIR}/fastdeploy-linux-x64-0.3.0/) - -include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) - -# 添加FastDeploy依赖头文件 -include_directories(${FASTDEPLOY_INCS}) - -add_executable(arcface_demo ${PROJECT_SOURCE_DIR}/arcface.cc) -# 添加FastDeploy库依赖 -target_link_libraries(arcface_demo ${FASTDEPLOY_LIBS}) diff --git a/model_zoo/vision/arcface/cpp/README.md b/model_zoo/vision/arcface/cpp/README.md deleted file mode 100644 index bb7145d32..000000000 --- a/model_zoo/vision/arcface/cpp/README.md +++ /dev/null @@ -1,61 +0,0 @@ -# 编译ArcFace示例 - -## 0. 简介 -当前支持模型版本为:[ArcFace CommitID:babb9a5](https://github.com/deepinsight/insightface/commit/babb9a5) - -## 1. 下载和解压预测库 -```bash -wget https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.3.0.tgz -tar xvf fastdeploy-linux-x64-0.3.0.tgz -``` - -## 1. 编译示例代码 -```bash -mkdir build & cd build -cmake .. -make -j -``` - -## 3. 特别说明 -fastdeploy支持 [insightface](https://github.com/deepinsight/insightface/tree/master/recognition) 的人脸识别模块recognition中大部分模型的部署,包括ArcFace、CosFace、Partial FC、VPL等,由于用法类似,这里仅用ArcFace来演示部署流程。所有支持的模型结构,可参考 [ArcFace API文档](../api.md). - -## 4. 获取ONNX文件 - -访问[ArcFace](https://github.com/deepinsight/insightface/tree/master/recognition/arcface_torch)官方github库,按照指引下载安装,下载pt模型文件,利用 `torch2onnx.py` 得到`onnx`格式文件。 - -* 下载ArcFace模型文件 - ``` - Link: https://pan.baidu.com/share/init?surl=CL-l4zWqsI1oDuEEYVhj-g code: e8pw - ``` - -* 导出onnx格式文件 - ```bash - PYTHONPATH=. python ./torch2onnx.py ms1mv3_arcface_r100_fp16/backbone.pth --output ms1mv3_arcface_r100.onnx --network r100 --simplify 1 - ``` -* 移动onnx文件到model_zoo/arcface的目录 - ```bash - cp PATH/TO/ms1mv3_arcface_r100.onnx PATH/TO/model_zoo/vision/arcface/ - ``` - - -## 5. 准备测试图片 -准备3张仅包含人脸的测试图片,命名为face_recognition_*.jpg,并拷贝到可执行文件所在的目录,比如 -```bash -face_recognition_0.png # 0,1 同一个人 -face_recognition_1.png -face_recognition_2.png # 0,2 不同的人 -``` - -## 6. 执行 -```bash -./arcface_demo -``` - -执行完成后会输出检测结果如下 -``` -FaceRecognitionResult: [Dim(512), Min(-0.141219), Max(0.121645), Mean(-0.003172)] -FaceRecognitionResult: [Dim(512), Min(-0.117939), Max(0.141897), Mean(0.000407)] -FaceRecognitionResult: [Dim(512), Min(-0.124471), Max(0.112567), Mean(-0.001320)] -Cosine 01: 0.7211584683376316 -Cosine 02: -0.06262668682788906 -``` diff --git a/model_zoo/vision/arcface/cpp/arcface.cc b/model_zoo/vision/arcface/cpp/arcface.cc deleted file mode 100644 index 723bf9398..000000000 --- a/model_zoo/vision/arcface/cpp/arcface.cc +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision.h" -#include "fastdeploy/vision/utils/utils.h" - -int main() { - namespace vis = fastdeploy::vision; - // 0,1 同一个人, 0,2 不同的人 - std::string model_file = "./ms1mv3_arcface_r100.onnx"; - std::string face0_path = "./face_recognition_0.png"; - std::string face1_path = "./face_recognition_1.png"; - std::string face2_path = "./face_recognition_2.png"; - - auto model = vis::deepinsight::ArcFace(model_file); - if (!model.Initialized()) { - std::cerr << "Init Failed! Model: " << model_file << std::endl; - return -1; - } else { - std::cout << "Init Done! Model:" << model_file << std::endl; - } - model.EnableDebug(); - // 设置输出l2 normalize后的embedding - model.l2_normalize = true; - - cv::Mat face0 = cv::imread(face0_path); - cv::Mat face1 = cv::imread(face1_path); - cv::Mat face2 = cv::imread(face2_path); - - vis::FaceRecognitionResult res0; - vis::FaceRecognitionResult res1; - vis::FaceRecognitionResult res2; - if ((!model.Predict(&face0, &res0)) || (!model.Predict(&face1, &res1)) || - (!model.Predict(&face2, &res2))) { - std::cerr << "Prediction Failed." << std::endl; - return -1; - } - std::cout << "Prediction Done!" << std::endl; - - // 输出预测框结果 - std::cout << "--- [Face 0]:" << res0.Str(); - std::cout << "--- [Face 1]:" << res1.Str(); - std::cout << "--- [Face 2]:" << res2.Str(); - - // 计算余弦相似度 - float cosine01 = vis::utils::CosineSimilarity(res0.embedding, res1.embedding, - model.l2_normalize); - float cosine02 = vis::utils::CosineSimilarity(res0.embedding, res2.embedding, - model.l2_normalize); - std::cout << "Detect Done! Cosine 01: " << cosine01 - << ", Cosine 02:" << cosine02 << std::endl; - return 0; -} diff --git a/model_zoo/vision/modnet/README.md b/model_zoo/vision/modnet/README.md deleted file mode 100644 index 90b3fafdc..000000000 --- a/model_zoo/vision/modnet/README.md +++ /dev/null @@ -1,67 +0,0 @@ -# MODNet 部署示例 - -## 0. 简介 -当前支持模型版本为:[MODNet CommitID:28165a4](https://github.com/ZHKKKe/MODNet/commit/28165a4) - -本文档说明如何进行[MODNet](https://github.com/ZHKKKe/MODNet) 的快速部署推理。本目录结构如下 - -``` -. -├── cpp # C++ 代码目录 -│   ├── CMakeLists.txt # C++ 代码编译CMakeLists文件 -│   ├── README.md # C++ 代码编译部署文档 -│   └── modnet.cc # C++ 示例代码 -├── api.md # API 说明文档 -├── README.md # MODNet 部署文档 -└── modnet.py # Python示例代码 -``` - -## 1. 获取ONNX文件 - -访问[MODNet](https://github.com/ZHKKKe/MODNet)官方github库,按照指引下载安装,下载模型文件,利用 `onnx/export_onnx.py` 得到`onnx`格式文件。 - -* 导出onnx格式文件 - ```bash - python -m onnx.export_onnx \ - --ckpt-path=pretrained/modnet_photographic_portrait_matting.ckpt \ - --output-path=pretrained/modnet_photographic_portrait_matting.onnx - ``` -* 移动onnx文件到model_zoo/modnet的目录 - ```bash - cp PATH/TO/modnet_photographic_portrait_matting.onnx PATH/TO/model_zoo/vision/modnet/ - ``` - - -## 2. 准备测试图片 -准备1张仅包含人像的测试图片,命名为matting_1.jpg,并拷贝到可执行文件所在的目录,比如 -```bash -matting_1.jpg -``` - -## 3. 安装FastDeploy - -使用如下命令安装FastDeploy,注意到此处安装的是`vision-cpu`,也可根据需求安装`vision-gpu` -```bash -# 安装fastdeploy-python工具 -pip install fastdeploy-python - -# 安装vision-cpu模块 -fastdeploy install vision-cpu -``` - -## 4. Python部署 - -执行如下代码即会自动下载MODNet模型和测试图片 -```bash -python modnet.py -``` - -执行完成后会输出检测结果如下, 可视化结果保存在`vis_result.jpg`中 -``` -MattingResult[Foreground(false), Alpha(Numel(65536), Shape(256,256), Min(0.000000), Max(1.000000), Mean(0.464415))] -``` - -## 5. 其它文档 - -- [C++部署](./cpp/README.md) -- [MODNet API文档](./api.md) diff --git a/model_zoo/vision/modnet/api.md b/model_zoo/vision/modnet/api.md deleted file mode 100644 index 164ef099e..000000000 --- a/model_zoo/vision/modnet/api.md +++ /dev/null @@ -1,72 +0,0 @@ -# MODNet API说明 - -## 1. Python API - -### 1.1 MODNet 类 - -#### 1.1.1 类初始化说明 -```python -fastdeploy.vision.zhkkke.MODNet(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX) -``` -MODNet模型加载和初始化,当model_format为`fd.Frontend.ONNX`时,只需提供model_file,如`xxx.onnx`;当model_format为`fd.Frontend.PADDLE`时,则需同时提供model_file和params_file。 - -**参数** - -> * **model_file**(str): 模型文件路径 -> * **params_file**(str): 参数文件路径 -> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 -> * **model_format**(Frontend): 模型格式 - -#### 1.1.2 predict函数 -> ```python -> MODNet.predict(image_data) -> ``` -> 模型预测结口,输入图像直接输出检测结果。 -> -> **参数** -> -> > * **image_data**(np.ndarray): 输入数据,注意需为HWC,BGR格式 - -示例代码参考[modnet.py](./modnet.py) - - -## 2. C++ API - -### 2.1 MODNet 类 -#### 2.1.1 类初始化说明 -```C++ -fastdeploy::vision::zhkkke::MODNet( - const string& model_file, - const string& params_file = "", - const RuntimeOption& runtime_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX) -``` -MODNet模型加载和初始化,当model_format为`Frontend::ONNX`时,只需提供model_file,如`xxx.onnx`;当model_format为`Frontend::PADDLE`时,则需同时提供model_file和params_file。 - -**参数** - -> * **model_file**(str): 模型文件路径 -> * **params_file**(str): 参数文件路径 -> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 -> * **model_format**(Frontend): 模型格式 - -#### 2.1.2 Predict函数 -> ```C++ -> MODNet::Predict(cv::Mat* im, MattingResult* result) -> ``` -> 模型预测接口,输入图像直接输出检测结果。 -> -> **参数** -> -> > * **im**: 输入图像,注意需为HWC,BGR格式 -> > * **result**: 检测结果,包含的成员如下 -> > * alpha: std::vector\ 包含透明度 -> > * contain_foreground: bool 表示输出是否包含预测的前景 -> > * foreground: std::vector\ 如果模型包含前景预测,则此项为预测的前景 -> > * shape: std::vector\ 包含输出alpha的维度(h,w), 如果包含前景,则shape为(h,w,c) c表示前景的通道数,一般为c=3 - -示例代码参考[cpp/modnet.cc](cpp/modnet.cc) - -## 3. 其它API使用 - -- [模型部署RuntimeOption配置](../../../docs/api/runtime_option.md) diff --git a/model_zoo/vision/modnet/cpp/CMakeLists.txt b/model_zoo/vision/modnet/cpp/CMakeLists.txt deleted file mode 100644 index 9e500debe..000000000 --- a/model_zoo/vision/modnet/cpp/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -PROJECT(modnet_demo C CXX) -CMAKE_MINIMUM_REQUIRED(VERSION 3.16) - -# 在低版本ABI环境中,通过如下代码进行兼容性编译 -# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) - -# 指定下载解压后的fastdeploy库路径 -set(FASTDEPLOY_INSTALL_DIR ${PROJECT_SOURCE_DIR}/fastdeploy-linux-x64-0.3.0/) - -include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) - -# 添加FastDeploy依赖头文件 -include_directories(${FASTDEPLOY_INCS}) - -add_executable(modnet_demo ${PROJECT_SOURCE_DIR}/modnet.cc) -# 添加FastDeploy库依赖 -target_link_libraries(modnet_demo ${FASTDEPLOY_LIBS}) diff --git a/model_zoo/vision/modnet/cpp/README.md b/model_zoo/vision/modnet/cpp/README.md deleted file mode 100644 index f2b2e4949..000000000 --- a/model_zoo/vision/modnet/cpp/README.md +++ /dev/null @@ -1,49 +0,0 @@ -# 编译ArcFace示例 - -## 0. 简介 -当前支持模型版本为:[MODNet CommitID:28165a4](https://github.com/ZHKKKe/MODNet/commit/28165a4) - -## 1. 下载和解压预测库 -```bash -wget https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.3.0.tgz -tar xvf fastdeploy-linux-x64-0.3.0.tgz -``` - -## 2. 编译示例代码 -```bash -mkdir build & cd build -cmake .. -make -j -``` - -## 3. 获取ONNX文件 - -访问[MODNet](https://github.com/ZHKKKe/MODNet)官方github库,按照指引下载安装,下载模型文件,利用 `onnx/export_onnx.py` 得到`onnx`格式文件。 - -* 导出onnx格式文件 - ```bash - python -m onnx.export_onnx \ - --ckpt-path=pretrained/modnet_photographic_portrait_matting.ckpt \ - --output-path=pretrained/modnet_photographic_portrait_matting.onnx - ``` -* 移动onnx文件到model_zoo/modnet的目录 - ```bash - cp PATH/TO/modnet_photographic_portrait_matting.onnx PATH/TO/model_zoo/vision/modnet/ - ``` - - -## 4. 准备测试图片 -准备1张仅包含人像的测试图片,命名为matting_1.jpg,并拷贝到可执行文件所在的目录,比如 -```bash -matting_1.jpg -``` - -## 5. 执行 -```bash -./modnet_demo -``` - -执行完成后会输出检测结果如下, 可视化结果保存在`vis_result.jpg`中 -``` -MattingResult[Foreground(false), Alpha(Numel(65536), Shape(256,256), Min(0.000000), Max(1.000000), Mean(0.464415))] -``` diff --git a/model_zoo/vision/modnet/cpp/modnet.cc b/model_zoo/vision/modnet/cpp/modnet.cc deleted file mode 100644 index b89b2d1dc..000000000 --- a/model_zoo/vision/modnet/cpp/modnet.cc +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision.h" - -int main() { - namespace vis = fastdeploy::vision; - - std::string model_file = "./modnet_photographic_portrait_matting.onnx"; - std::string img_path = "./matting_1.jpg"; - std::string vis_path = "./vis_result.jpg"; - - auto model = vis::zhkkke::MODNet(model_file); - if (!model.Initialized()) { - std::cerr << "Init Failed! Model: " << model_file << std::endl; - return -1; - } else { - std::cout << "Init Done! Model:" << model_file << std::endl; - } - model.EnableDebug(); - - // 设置推理size, 必须和模型文件一致 - model.size = {256, 256}; - - cv::Mat im = cv::imread(img_path); - cv::Mat im_old = im.clone(); - cv::Mat vis_im = im.clone(); - - vis::MattingResult res; - if (!model.Predict(&im, &res)) { - std::cerr << "Prediction Failed." << std::endl; - return -1; - } - std::cout << "Prediction Done!" << std::endl; - - // 输出预测结果 - std::cout << res.Str() << std::endl; - - // 可视化预测结果 - bool remove_small_connected_area = true; - vis::Visualize::VisMattingAlpha(im_old, res, &vis_im, - remove_small_connected_area); - cv::imwrite(vis_path, vis_im); - std::cout << "Detect Done! Saved: " << vis_path << std::endl; - return 0; -} diff --git a/model_zoo/vision/modnet/modnet.py b/model_zoo/vision/modnet/modnet.py deleted file mode 100644 index 538a667bd..000000000 --- a/model_zoo/vision/modnet/modnet.py +++ /dev/null @@ -1,22 +0,0 @@ -import fastdeploy as fd -import cv2 - -# 加载模型 -model = fd.vision.zhkkke.MODNet("modnet_photographic_portrait_matting.onnx") - -# 设置模型输入大小 -model.size = (256, 256) - -# 预测图片 -im = cv2.imread("./matting_1.jpg") -im_old = im.copy() -vis_im = im.copy() - -result = model.predict(im) -# 可视化结果 -fd.vision.visualize.vis_matting_alpha(im_old, result, vis_im, False) -cv2.imwrite("vis_result.jpg", vis_im) - -# 输出预测结果 -print(result) -print(model.runtime_option) diff --git a/model_zoo/vision/nanodet_plus/README.md b/model_zoo/vision/nanodet_plus/README.md deleted file mode 100644 index 164f7691f..000000000 --- a/model_zoo/vision/nanodet_plus/README.md +++ /dev/null @@ -1,46 +0,0 @@ -# NanoDetPlus部署示例 - -当前支持模型版本为:[NanoDetPlus v1.0.0-alpha-1](https://github.com/RangiLyu/nanodet/releases/tag/v1.0.0-alpha-1) - -本文档说明如何进行[NanoDetPlus](https://github.com/RangiLyu/nanodet)的快速部署推理。本目录结构如下 -``` -. -├── cpp # C++ 代码目录 -│   ├── CMakeLists.txt # C++ 代码编译CMakeLists文件 -│   ├── README.md # C++ 代码编译部署文档 -│   └── nanodet_plus.cc # C++ 示例代码 -├── README.md # YOLOX 部署文档 -└── nanodet_plus.py # Python示例代码 -``` - -## 安装FastDeploy - -使用如下命令安装FastDeploy,注意到此处安装的是`vision-cpu`,也可根据需求安装`vision-gpu` -``` -# 安装fastdeploy-python工具 -pip install fastdeploy-python - -# 安装vision-cpu模块 -fastdeploy install vision-cpu -``` - -## Python部署 - -执行如下代码即会自动下载NanoDetPlus模型和测试图片 -``` -python nanodet_plus.py -``` - -执行完成后会将可视化结果保存在本地`vis_result.jpg`,同时输出检测结果如下 -``` -DetectionResult: [xmin, ymin, xmax, ymax, score, label_id] -5.710144,220.634033, 807.854370, 724.089111, 0.825635, 5 -45.646439,393.694061, 229.267044, 903.998413, 0.818263, 0 -218.289322,402.268829, 342.083252, 861.766479, 0.709301, 0 -698.587036,325.627197, 809.000000, 876.990967, 0.630235, 0 -``` - -## 其它文档 - -- [C++部署](./cpp/README.md) -- [NanoDetPlus API文档](./api.md) diff --git a/model_zoo/vision/nanodet_plus/api.md b/model_zoo/vision/nanodet_plus/api.md deleted file mode 100644 index b428e39df..000000000 --- a/model_zoo/vision/nanodet_plus/api.md +++ /dev/null @@ -1,71 +0,0 @@ -# NanoDetPlus API说明 - -## Python API - -### NanoDetPlus类 -``` -fastdeploy.vision.rangilyu.NanoDetPlus(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX) -``` -NanoDetPlus模型加载和初始化,当model_format为`fd.Frontend.ONNX`时,只需提供model_file,如`nanodet-plus-m_320.onnx`;当model_format为`fd.Frontend.PADDLE`时,则需同时提供model_file和params_file。 - -**参数** - -> * **model_file**(str): 模型文件路径 -> * **params_file**(str): 参数文件路径 -> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 -> * **model_format**(Frontend): 模型格式 - -#### predict函数 -> ``` -> NanoDetPlus.predict(image_data, conf_threshold=0.35, nms_iou_threshold=0.5) -> ``` -> 模型预测结口,输入图像直接输出检测结果。 -> -> **参数** -> -> > * **image_data**(np.ndarray): 输入数据,注意需为HWC,BGR格式 -> > * **conf_threshold**(float): 检测框置信度过滤阈值 -> > * **nms_iou_threshold**(float): NMS处理过程中iou阈值 - -示例代码参考[nanodet_plus.py](./nanodet_plus.py) - - -## C++ API - -### NanoDetPlus类 -``` -fastdeploy::vision::rangilyu::NanoDetPlus( - const string& model_file, - const string& params_file = "", - const RuntimeOption& runtime_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX) -``` -NanoDetPlus模型加载和初始化,当model_format为`Frontend::ONNX`时,只需提供model_file,如`nanodet-plus-m_320.onnx`;当model_format为`Frontend::PADDLE`时,则需同时提供model_file和params_file。 - -**参数** - -> * **model_file**(str): 模型文件路径 -> * **params_file**(str): 参数文件路径 -> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 -> * **model_format**(Frontend): 模型格式 - -#### Predict函数 -> ``` -> NanoDetPlus::Predict(cv::Mat* im, DetectionResult* result, -> float conf_threshold = 0.35, -> float nms_iou_threshold = 0.5) -> ``` -> 模型预测接口,输入图像直接输出检测结果。 -> -> **参数** -> -> > * **im**: 输入图像,注意需为HWC,BGR格式 -> > * **result**: 检测结果,包括检测框,各个框的置信度 -> > * **conf_threshold**: 检测框置信度过滤阈值 -> > * **nms_iou_threshold**: NMS处理过程中iou阈值 - -示例代码参考[cpp/nanodet_plus.cc](cpp/nanodet_plus.cc) - -## 其它API使用 - -- [模型部署RuntimeOption配置](../../../docs/api/runtime_option.md) diff --git a/model_zoo/vision/nanodet_plus/cpp/CMakeLists.txt b/model_zoo/vision/nanodet_plus/cpp/CMakeLists.txt deleted file mode 100644 index 7a78ef9e4..000000000 --- a/model_zoo/vision/nanodet_plus/cpp/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -PROJECT(nanodet_plus_demo C CXX) -CMAKE_MINIMUM_REQUIRED(VERSION 3.16) - -# 在低版本ABI环境中,通过如下代码进行兼容性编译 -# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) - -# 指定下载解压后的fastdeploy库路径 -set(FASTDEPLOY_INSTALL_DIR ${PROJECT_SOURCE_DIR}/fastdeploy-linux-x64-0.0.3/) - -include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) - -# 添加FastDeploy依赖头文件 -include_directories(${FASTDEPLOY_INCS}) - -add_executable(nanodet_plus_demo ${PROJECT_SOURCE_DIR}/nanodet_plus.cc) -# 添加FastDeploy库依赖 -target_link_libraries(nanodet_plus_demo ${FASTDEPLOY_LIBS}) diff --git a/model_zoo/vision/nanodet_plus/cpp/README.md b/model_zoo/vision/nanodet_plus/cpp/README.md deleted file mode 100644 index 03dc65a0a..000000000 --- a/model_zoo/vision/nanodet_plus/cpp/README.md +++ /dev/null @@ -1,30 +0,0 @@ -# 编译NanoDetPlus示例 - -当前支持模型版本为:[NanoDetPlus v1.0.0-alpha-1](https://github.com/RangiLyu/nanodet/releases/tag/v1.0.0-alpha-1) - -``` -# 下载和解压预测库 -wget https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.0.3.tgz -tar xvf fastdeploy-linux-x64-0.0.3.tgz - -# 编译示例代码 -mkdir build & cd build -cmake .. -make -j - -# 下载模型和图片 -wget https://github.com/RangiLyu/nanodet/releases/download/v1.0.0-alpha-1/nanodet-plus-m_320.onnx -wget https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/bus.jpg - -# 执行 -./nanodet_plus_demo -``` - -执行完后可视化的结果保存在本地`vis_result.jpg`,同时会将检测框输出在终端,如下所示 -``` -DetectionResult: [xmin, ymin, xmax, ymax, score, label_id] -5.710144,220.634033, 807.854370, 724.089111, 0.825635, 5 -45.646439,393.694061, 229.267044, 903.998413, 0.818263, 0 -218.289322,402.268829, 342.083252, 861.766479, 0.709301, 0 -698.587036,325.627197, 809.000000, 876.990967, 0.630235, 0 -``` diff --git a/model_zoo/vision/nanodet_plus/cpp/nanodet_plus.cc b/model_zoo/vision/nanodet_plus/cpp/nanodet_plus.cc deleted file mode 100644 index b252bf6f8..000000000 --- a/model_zoo/vision/nanodet_plus/cpp/nanodet_plus.cc +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision.h" - -int main() { - namespace vis = fastdeploy::vision; - auto model = vis::rangilyu::NanoDetPlus("nanodet-plus-m_320.onnx"); - if (!model.Initialized()) { - std::cerr << "Init Failed." << std::endl; - return -1; - } - cv::Mat im = cv::imread("bus.jpg"); - cv::Mat vis_im = im.clone(); - - vis::DetectionResult res; - if (!model.Predict(&im, &res)) { - std::cerr << "Prediction Failed." << std::endl; - return -1; - } - - // 输出预测框结果 - std::cout << res.Str() << std::endl; - - // 可视化预测结果 - vis::Visualize::VisDetection(&vis_im, res); - cv::imwrite("vis_result.jpg", vis_im); - return 0; -} diff --git a/model_zoo/vision/nanodet_plus/nanodet_plus.py b/model_zoo/vision/nanodet_plus/nanodet_plus.py deleted file mode 100644 index 4101d2040..000000000 --- a/model_zoo/vision/nanodet_plus/nanodet_plus.py +++ /dev/null @@ -1,23 +0,0 @@ -import fastdeploy as fd -import cv2 - -# 下载模型和测试图片 -model_url = "https://github.com/RangiLyu/nanodet/releases/download/v1.0.0-alpha-1/nanodet-plus-m_320.onnx" -test_jpg_url = "https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/bus.jpg" -fd.download(model_url, ".", show_progress=True) -fd.download(test_jpg_url, ".", show_progress=True) - -# 加载模型 -model = fd.vision.rangilyu.NanoDetPlus("nanodet-plus-m_320.onnx") - -# 预测图片 -im = cv2.imread("bus.jpg") -result = model.predict(im, conf_threshold=0.35, nms_iou_threshold=0.5) - -# 可视化结果 -fd.vision.visualize.vis_detection(im, result) -cv2.imwrite("vis_result.jpg", im) - -# 输出预测结果 -print(result) -print(model.runtime_option) diff --git a/model_zoo/vision/ppseg/ppseg_unet.py b/model_zoo/vision/ppseg/ppseg_unet.py deleted file mode 100644 index 750e2167b..000000000 --- a/model_zoo/vision/ppseg/ppseg_unet.py +++ /dev/null @@ -1,26 +0,0 @@ -import fastdeploy as fd -import cv2 -import tarfile - -# 下载模型和测试图片 -model_url = "https://github.com/felixhjh/Fastdeploy-Models/raw/main/unet_Cityscapes.tar.gz" -test_jpg_url = "https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png" -fd.download_and_decompress(model_url, ".") -fd.download(test_jpg_url, ".", show_progress=True) -# 加载模型 -model = fd.vision.ppseg.Model("./unet_Cityscapes/model.pdmodel", - "./unet_Cityscapes/model.pdiparams", - "./unet_Cityscapes/deploy.yaml") - -# 预测图片 -im = cv2.imread("./cityscapes_demo.png") -result = model.predict(im) - -vis_im = im.copy() -# 可视化结果 -fd.vision.visualize.vis_segmentation(im, result, vis_im) -cv2.imwrite("vis_img.png", vis_im) - -# 输出预测结果 -print(result) -print(model.runtime_option) diff --git a/model_zoo/vision/retinaface/README.md b/model_zoo/vision/retinaface/README.md deleted file mode 100644 index 2b1902740..000000000 --- a/model_zoo/vision/retinaface/README.md +++ /dev/null @@ -1,76 +0,0 @@ -# RetinaFace部署示例 - -当前支持模型版本为:[RetinaFace CommitID:b984b4b](https://github.com/biubug6/Pytorch_Retinaface/commit/b984b4b) - -本文档说明如何进行[RetinaFace](https://github.com/biubug6/Pytorch_Retinaface)的快速部署推理。本目录结构如下 - -``` -. -├── cpp # C++ 代码目录 -│   ├── CMakeLists.txt # C++ 代码编译CMakeLists文件 -│   ├── README.md # C++ 代码编译部署文档 -│   └── retinaface.cc # C++ 示例代码 -├── api.md # API 说明文档 -├── README.md # RetinaFace 部署文档 -└── retinaface.py # Python示例代码 -``` - -## 安装FastDeploy - -使用如下命令安装FastDeploy,注意到此处安装的是`vision-cpu`,也可根据需求安装`vision-gpu` -```bash -# 安装fastdeploy-python工具 -pip install fastdeploy-python - -# 安装vision-cpu模块 -fastdeploy install vision-cpu -``` - -## Python部署 - -执行如下代码即会自动下载RetinaFace模型和测试图片 -```bash -python retinaface.py -``` - -## 手动获取ONNX模型文件 -自动下载的模型文件是我们事先转换好的,如果您需要从RetinaFace官方repo导出ONNX,请参考以下步骤。 - -* 下载官方仓库并 -```bash -git clone https://github.com/biubug6/Pytorch_Retinaface.git -``` -* 下载预训练权重并放在weights文件夹 -```text -./weights/ - mobilenet0.25_Final.pth - mobilenetV1X0.25_pretrain.tar - Resnet50_Final.pth -``` -* 运行convert_to_onnx.py导出ONNX模型文件 -```bash -PYTHONPATH=. python convert_to_onnx.py --trained_model ./weights/mobilenet0.25_Final.pth --network mobile0.25 --long_side 640 --cpu -PYTHONPATH=. python convert_to_onnx.py --trained_model ./weights/Resnet50_Final.pth --network resnet50 --long_side 640 --cpu -``` -注意:需要先对convert_to_onnx.py脚本中的--long_side参数增加类型约束,type=int. -* 使用onnxsim对模型进行简化 -```bash -onnxsim FaceDetector.onnx Pytorch_RetinaFace_mobile0.25-640-640.onnx # mobilenet -onnxsim FaceDetector.onnx Pytorch_RetinaFace_resnet50-640-640.onnx # resnet50 -``` - - -执行完成后会将可视化结果保存在本地`vis_result.jpg`,同时输出检测结果如下 -``` -FaceDetectionResult: [xmin, ymin, xmax, ymax, score, (x, y) x 5] -403.339783,254.192413, 490.002747, 351.931213, 0.999427, (425.657257,293.820740), (467.249451,293.667267), (446.830078,315.016388), (428.903381,326.129425), (465.764648,325.837341) -296.834564,181.992035, 384.516876, 277.461243, 0.999194, (313.605164,224.800110), (352.888977,219.088043), (333.530182,239.872787), (325.395203,255.463852), (358.417175,250.529892) -742.206238,263.547424, 840.871765, 366.171387, 0.999068, (762.715759,308.939880), (809.019653,304.544830), (786.174194,329.286163), (771.952271,341.376038), (812.717529,337.528839) -545.351685,228.015930, 635.423584, 335.458649, 0.998681, (559.295654,269.971619), (598.439758,273.823608), (567.496643,292.894348), (558.160034,306.637238), (592.175781,309.493591) -180.078125,241.787888, 257.213135, 320.321777, 0.998342, (203.702591,272.032715), (237.497726,271.356445), (222.380402,288.225708), (208.015259,301.360352), (233.943451,300.801636) -``` - -## 其它文档 - -- [C++部署](./cpp/README.md) -- [RetinaFace API文档](./api.md) diff --git a/model_zoo/vision/retinaface/api.md b/model_zoo/vision/retinaface/api.md deleted file mode 100644 index 47afddc87..000000000 --- a/model_zoo/vision/retinaface/api.md +++ /dev/null @@ -1,71 +0,0 @@ -# RetinaFace API说明 - -## Python API - -### RetinaFace类 -``` -fastdeploy.vision.biubug6.RetinaFace(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX) -``` -RetinaFace模型加载和初始化,当model_format为`fd.Frontend.ONNX`时,只需提供model_file,如`Pytorch_RetinaFace_mobile0.25-640-640.onnx`;当model_format为`fd.Frontend.PADDLE`时,则需同时提供model_file和params_file。 - -**参数** - -> * **model_file**(str): 模型文件路径 -> * **params_file**(str): 参数文件路径 -> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 -> * **model_format**(Frontend): 模型格式 - -#### predict函数 -> ``` -> RetinaFace.predict(image_data, conf_threshold=0.7, nms_iou_threshold=0.3) -> ``` -> 模型预测结口,输入图像直接输出检测结果。 -> -> **参数** -> -> > * **image_data**(np.ndarray): 输入数据,注意需为HWC,BGR格式 -> > * **conf_threshold**(float): 检测框置信度过滤阈值 -> > * **nms_iou_threshold**(float): NMS处理过程中iou阈值 - -示例代码参考[retinaface.py](./retinaface.py) - - -## C++ API - -### RetinaFace 类 -``` -fastdeploy::vision::biubug6::RetinaFace( - const string& model_file, - const string& params_file = "", - const RuntimeOption& runtime_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX) -``` -RetinaFace模型加载和初始化,当model_format为`Frontend::ONNX`时,只需提供model_file,如`Pytorch_RetinaFace_mobile0.25-640-640.onnx`;当model_format为`Frontend::PADDLE`时,则需同时提供model_file和params_file。 - -**参数** - -> * **model_file**(str): 模型文件路径 -> * **params_file**(str): 参数文件路径 -> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 -> * **model_format**(Frontend): 模型格式 - -#### Predict函数 -> ``` -> RetinaFace::Predict(cv::Mat* im, FaceDetectionResult* result, -> float conf_threshold = 0.7, -> float nms_iou_threshold = 0.3) -> ``` -> 模型预测接口,输入图像直接输出检测结果。 -> -> **参数** -> -> > * **im**: 输入图像,注意需为HWC,BGR格式 -> > * **result**: 检测结果,包括检测框,各个框的置信度 -> > * **conf_threshold**: 检测框置信度过滤阈值 -> > * **nms_iou_threshold**: NMS处理过程中iou阈值 - -示例代码参考[cpp/retinaface.cc](cpp/retinaface.cc) - -## 其它API使用 - -- [模型部署RuntimeOption配置](../../../docs/api/runtime_option.md) diff --git a/model_zoo/vision/retinaface/cpp/CMakeLists.txt b/model_zoo/vision/retinaface/cpp/CMakeLists.txt deleted file mode 100644 index 7ca567b82..000000000 --- a/model_zoo/vision/retinaface/cpp/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -PROJECT(retinaface_demo C CXX) -CMAKE_MINIMUM_REQUIRED (VERSION 3.16) - -# 在低版本ABI环境中,通过如下代码进行兼容性编译 -# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) - -# 指定下载解压后的fastdeploy库路径 -set(FASTDEPLOY_INSTALL_DIR ${PROJECT_SOURCE_DIR}/fastdeploy-linux-x64-0.3.0/) - -include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) - -# 添加FastDeploy依赖头文件 -include_directories(${FASTDEPLOY_INCS}) - -add_executable(retinaface_demo ${PROJECT_SOURCE_DIR}/retinaface.cc) -# 添加FastDeploy库依赖 -target_link_libraries(retinaface_demo ${FASTDEPLOY_LIBS}) diff --git a/model_zoo/vision/retinaface/cpp/README.md b/model_zoo/vision/retinaface/cpp/README.md deleted file mode 100644 index ba400b570..000000000 --- a/model_zoo/vision/retinaface/cpp/README.md +++ /dev/null @@ -1,61 +0,0 @@ -# 编译RetinaFace示例 - -当前支持模型版本为:[RetinaFace CommitID:b984b4b](https://github.com/biubug6/Pytorch_Retinaface/commit/b984b4b) - -## 下载和解压预测库 -```bash -wget https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.0.3.tgz -tar xvf fastdeploy-linux-x64-0.0.3.tgz -``` - -## 编译示例代码 -```bash -mkdir build & cd build -cmake .. -make -j -``` - -## 下载模型和图片 -wget https://github.com/DefTruth/Pytorch_Retinaface/releases/download/v0.1/Pytorch_RetinaFace_mobile0.25-640-640.onnx -wget https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/raw/master/imgs/3.jpg - -## 手动获取ONNX模型文件 -自动下载的模型文件是我们事先转换好的,如果您需要从RetinaFace官方repo导出ONNX,请参考以下步骤。 - -* 下载官方仓库并 -```bash -git clone https://github.com/biubug6/Pytorch_Retinaface.git -``` -* 下载预训练权重并放在weights文件夹 -```text -./weights/ - mobilenet0.25_Final.pth - mobilenetV1X0.25_pretrain.tar - Resnet50_Final.pth -``` -* 运行convert_to_onnx.py导出ONNX模型文件 -```bash -PYTHONPATH=. python convert_to_onnx.py --trained_model ./weights/mobilenet0.25_Final.pth --network mobile0.25 --long_side 640 --cpu -PYTHONPATH=. python convert_to_onnx.py --trained_model ./weights/Resnet50_Final.pth --network resnet50 --long_side 640 --cpu -``` -注意:需要先对convert_to_onnx.py脚本中的--long_side参数增加类型约束,type=int. -* 使用onnxsim对模型进行简化 -```bash -onnxsim FaceDetector.onnx Pytorch_RetinaFace_mobile0.25-640-640.onnx # mobilenet -onnxsim FaceDetector.onnx Pytorch_RetinaFace_resnet50-640-640.onnx # resnet50 -``` - -## 执行 -```bash -./retinaface_demo -``` - -执行完后可视化的结果保存在本地`vis_result.jpg`,同时会将检测框输出在终端,如下所示 -``` -FaceDetectionResult: [xmin, ymin, xmax, ymax, score, (x, y) x 5] -403.339783,254.192413, 490.002747, 351.931213, 0.999427, (425.657257,293.820740), (467.249451,293.667267), (446.830078,315.016388), (428.903381,326.129425), (465.764648,325.837341) -296.834564,181.992035, 384.516876, 277.461243, 0.999194, (313.605164,224.800110), (352.888977,219.088043), (333.530182,239.872787), (325.395203,255.463852), (358.417175,250.529892) -742.206238,263.547424, 840.871765, 366.171387, 0.999068, (762.715759,308.939880), (809.019653,304.544830), (786.174194,329.286163), (771.952271,341.376038), (812.717529,337.528839) -545.351685,228.015930, 635.423584, 335.458649, 0.998681, (559.295654,269.971619), (598.439758,273.823608), (567.496643,292.894348), (558.160034,306.637238), (592.175781,309.493591) -180.078125,241.787888, 257.213135, 320.321777, 0.998342, (203.702591,272.032715), (237.497726,271.356445), (222.380402,288.225708), (208.015259,301.360352), (233.943451,300.801636) -``` diff --git a/model_zoo/vision/retinaface/cpp/retinaface.cc b/model_zoo/vision/retinaface/cpp/retinaface.cc deleted file mode 100644 index 933b629c4..000000000 --- a/model_zoo/vision/retinaface/cpp/retinaface.cc +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision.h" - -int main() { - namespace vis = fastdeploy::vision; - - auto model = - vis::biubug6::RetinaFace("Pytorch_RetinaFace_mobile0.25-640-640.onnx"); - if (!model.Initialized()) { - std::cerr << "Init Failed! Model: " << model_file << std::endl; - return -1; - } else { - std::cout << "Init Done! Model:" << model_file << std::endl; - } - model.EnableDebug(); - - cv::Mat im = cv::imread("3.jpg"); - cv::Mat vis_im = im.clone(); - - vis::FaceDetectionResult res; - if (!model.Predict(&im, &res, 0.7f, 0.3f)) { - std::cerr << "Prediction Failed." << std::endl; - return -1; - } else { - std::cout << "Prediction Done!" << std::endl; - } - - // 输出预测框结果 - std::cout << res.Str() << std::endl; - - // 可视化预测结果 - vis::Visualize::VisFaceDetection(&vis_im, res, 2, 0.3f); - cv::imwrite("vis_result.jpg", vis_im); - std::cout << "Detect Done! Saved: " << vis_path << std::endl; - return 0; -} diff --git a/model_zoo/vision/retinaface/retinaface.py b/model_zoo/vision/retinaface/retinaface.py deleted file mode 100644 index 0c5bd4e33..000000000 --- a/model_zoo/vision/retinaface/retinaface.py +++ /dev/null @@ -1,24 +0,0 @@ -import fastdeploy as fd -import cv2 - -# 下载模型 -model_url = "https://github.com/DefTruth/Pytorch_Retinaface/releases/download/v0.1/Pytorch_RetinaFace_mobile0.25-640-640.onnx" -test_img_url = "https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/raw/master/imgs/3.jpg" -fd.download(model_url, ".", show_progress=True) -fd.download(test_img_url, ".", show_progress=True) - -# 加载模型 -model = fd.vision.biubug6.RetinaFace( - "Pytorch_RetinaFace_mobile0.25-640-640.onnx") - -# 预测图片 -im = cv2.imread("3.jpg") -result = model.predict(im, conf_threshold=0.7, nms_iou_threshold=0.3) - -# 可视化结果 -vis_im = fd.vision.visualize.vis_face_detection(im, result) -cv2.imwrite("vis_result.jpg", vis_im) - -# 输出预测结果 -print(result) -print(model.runtime_option) diff --git a/model_zoo/vision/scaledyolov4/README.md b/model_zoo/vision/scaledyolov4/README.md deleted file mode 100644 index 93d3bd6c1..000000000 --- a/model_zoo/vision/scaledyolov4/README.md +++ /dev/null @@ -1,66 +0,0 @@ -# 编译ScaledYOLOv4示例 - -当前支持模型版本为:[ScaledYOLOv4 branch yolov4-large](https://github.com/WongKinYiu/ScaledYOLOv4) - -本文档说明如何进行[ScaledYOLOv4](https://github.com/WongKinYiu/ScaledYOLOv4)的快速部署推理。本目录结构如下 - -``` -. -├── cpp -│   ├── CMakeLists.txt -│   ├── README.md -│   └── scaledyolov4.cc -├── README.md -└── scaled_yolov4.py -``` - -## 获取ONNX文件 - -- 手动获取 - - 访问[ScaledYOLOv4](https://github.com/WongKinYiu/ScaledYOLOv4)官方github库,按照指引下载安装,下载`scaledyolov4.pt` 模型,利用 `models/export.py` 得到`onnx`格式文件。如果您导出的`onnx`模型出现问题,可以参考[ScaledYOLOv4#401](https://github.com/WongKinYiu/ScaledYOLOv4/issues/401)的解决办法 - - ``` - #下载ScaledYOLOv4模型文件 - Download from the goole drive https://drive.google.com/file/d/1aXZZE999sHMP1gev60XhNChtHPRMH3Fz/view?usp=sharing - - # 导出onnx格式文件 - python models/export.py --weights PATH/TO/scaledyolov4-xx.pt --img-size 640 - - # 移动onnx文件到demo目录 - cp PATH/TO/scaledyolov4.onnx PATH/TO/model_zoo/vision/scaledyolov4/ - ``` - -## 安装FastDeploy - -使用如下命令安装FastDeploy,注意到此处安装的是`vision-cpu`,也可根据需求安装`vision-gpu` - -``` -# 安装fastdeploy-python工具 -pip install fastdeploy-python - -# 安装vision-cpu模块 -fastdeploy install vision-cpu -``` -## Python部署 - -执行如下代码即会自动下载测试图片 -``` -python scaled_yolov4.py -``` - -执行完成后会将可视化结果保存在本地`vis_result.jpg`,同时输出检测结果如下 -``` -DetectionResult: [xmin, ymin, xmax, ymax, score, label_id] -665.666321,390.477173, 810.000000, 879.829346, 0.940627, 0 -48.266064,396.217163, 247.338425, 901.974915, 0.922277, 0 -221.351868,408.446259, 345.524017, 857.927917, 0.910516, 0 -14.989746,228.662842, 801.292236, 735.677490, 0.820487, 5 -0.000000,548.260864, 75.825439, 873.932495, 0.718777, 0 -134.789062,473.950195, 148.526367, 506.777344, 0.513963, 27 -``` - -## 其它文档 - -- [C++部署](./cpp/README.md) -- [ScaledYOLOv4 API文档](./api.md) diff --git a/model_zoo/vision/scaledyolov4/api.md b/model_zoo/vision/scaledyolov4/api.md deleted file mode 100644 index e23559229..000000000 --- a/model_zoo/vision/scaledyolov4/api.md +++ /dev/null @@ -1,71 +0,0 @@ -# ScaledYOLOv4 API说明 - -## Python API - -### ScaledYOLOv4类 -``` -fastdeploy.vision.wongkinyiu.ScaledYOLOv4(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX) -``` -ScaledYOLOv4模型加载和初始化,当model_format为`fd.Frontend.ONNX`时,只需提供model_file,如`scaledyolov4.onnx`;当model_format为`fd.Frontend.PADDLE`时,则需同时提供model_file和params_file。 - -**参数** - -> * **model_file**(str): 模型文件路径 -> * **params_file**(str): 参数文件路径 -> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 -> * **model_format**(Frontend): 模型格式 - -#### predict函数 -> ``` -> ScaledYOLOv4.predict(image_data, conf_threshold=0.25, nms_iou_threshold=0.5) -> ``` -> 模型预测结口,输入图像直接输出检测结果。 -> -> **参数** -> -> > * **image_data**(np.ndarray): 输入数据,注意需为HWC,BGR格式 -> > * **conf_threshold**(float): 检测框置信度过滤阈值 -> > * **nms_iou_threshold**(float): NMS处理过程中iou阈值 - -示例代码参考[scaled_yolov4.py](./scaled_yolov4.py) - - -## C++ API - -### ScaledYOLOv4类 -``` -fastdeploy::vision::wongkinyiu::ScaledYOLOv4( - const string& model_file, - const string& params_file = "", - const RuntimeOption& runtime_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX) -``` -ScaledYOLOv4模型加载和初始化,当model_format为`Frontend::ONNX`时,只需提供model_file,如`scaledyolov4.onnx`;当model_format为`Frontend::PADDLE`时,则需同时提供model_file和params_file。 - -**参数** - -> * **model_file**(str): 模型文件路径 -> * **params_file**(str): 参数文件路径 -> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 -> * **model_format**(Frontend): 模型格式 - -#### Predict函数 -> ``` -> ScaledYOLOv4::Predict(cv::Mat* im, DetectionResult* result, -> float conf_threshold = 0.25, -> float nms_iou_threshold = 0.5) -> ``` -> 模型预测接口,输入图像直接输出检测结果。 -> -> **参数** -> -> > * **im**: 输入图像,注意需为HWC,BGR格式 -> > * **result**: 检测结果,包括检测框,各个框的置信度 -> > * **conf_threshold**: 检测框置信度过滤阈值 -> > * **nms_iou_threshold**: NMS处理过程中iou阈值 - -示例代码参考[cpp/scaledyolov4.cc](cpp/scaledyolov4.cc) - -## 其它API使用 - -- [模型部署RuntimeOption配置](../../../docs/api/runtime_option.md) diff --git a/model_zoo/vision/scaledyolov4/cpp/CMakeLists.txt b/model_zoo/vision/scaledyolov4/cpp/CMakeLists.txt deleted file mode 100644 index 062f4fa5d..000000000 --- a/model_zoo/vision/scaledyolov4/cpp/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -PROJECT(scaledyolov4_demo C CXX) -CMAKE_MINIMUM_REQUIRED (VERSION 3.16) - -# 在低版本ABI环境中,通过如下代码进行兼容性编译 -# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) - -# 指定下载解压后的fastdeploy库路径 -set(FASTDEPLOY_INSTALL_DIR ${PROJECT_SOURCE_DIR}/fastdeploy-linux-x64-0.3.0/) - -include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) - -# 添加FastDeploy依赖头文件 -include_directories(${FASTDEPLOY_INCS}) - -add_executable(scaledyolov4_demo ${PROJECT_SOURCE_DIR}/scaledyolov4.cc) -# 添加FastDeploy库依赖 -target_link_libraries(scaledyolov4_demo ${FASTDEPLOY_LIBS}) diff --git a/model_zoo/vision/scaledyolov4/cpp/README.md b/model_zoo/vision/scaledyolov4/cpp/README.md deleted file mode 100644 index 7372cc8b9..000000000 --- a/model_zoo/vision/scaledyolov4/cpp/README.md +++ /dev/null @@ -1,53 +0,0 @@ -# 编译ScaledYOLOv4示例 - -当前支持模型版本为:[ScaledYOLOv4 branch yolov4-large](https://github.com/WongKinYiu/ScaledYOLOv4) -## 获取ONNX文件 - -- 手动获取 - - 访问[ScaledYOLOv4](https://github.com/WongKinYiu/ScaledYOLOv4)官方github库,按照指引下载安装,下载`scaledyolov4.pt` 模型,利用 `models/export.py` 得到`onnx`格式文件。如果您导出的`onnx`模型出现问题,可以参考[ScaledYOLOv4#401](https://github.com/WongKinYiu/ScaledYOLOv4/issues/401)的解决办法 - - ``` - #下载ScaledYOLOv4模型文件 - Download from the goole drive https://drive.google.com/file/d/1aXZZE999sHMP1gev60XhNChtHPRMH3Fz/view?usp=sharing - - # 导出onnx格式文件 - python models/export.py --weights PATH/TO/scaledyolov4-xx-xx-xx.pt --img-size 640 - - # 移动onnx文件到demo目录 - cp PATH/TO/scaledyolov4.onnx PATH/TO/model_zoo/vision/scaledyolov4/ - ``` - - -## 运行demo - -``` -# 下载和解压预测库 -wget https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.0.3.tgz -tar xvf fastdeploy-linux-x64-0.0.3.tgz - -# 编译示例代码 -mkdir build & cd build -cmake .. -make -j - -# 移动onnx文件到demo目录 -cp PATH/TO/scaledyolov4.onnx PATH/TO/model_zoo/vision/scaledyolov4/cpp/build/ - -# 下载图片 -wget https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/bus.jpg - -# 执行 -./scaledyolov4_demo -``` - -执行完后可视化的结果保存在本地`vis_result.jpg`,同时会将检测框输出在终端,如下所示 -``` -DetectionResult: [xmin, ymin, xmax, ymax, score, label_id] -665.666321,390.477173, 810.000000, 879.829346, 0.940627, 0 -48.266064,396.217163, 247.338425, 901.974915, 0.922277, 0 -221.351868,408.446259, 345.524017, 857.927917, 0.910516, 0 -14.989746,228.662842, 801.292236, 735.677490, 0.820487, 5 -0.000000,548.260864, 75.825439, 873.932495, 0.718777, 0 -134.789062,473.950195, 148.526367, 506.777344, 0.513963, 27 -``` diff --git a/model_zoo/vision/scaledyolov4/cpp/scaledyolov4.cc b/model_zoo/vision/scaledyolov4/cpp/scaledyolov4.cc deleted file mode 100644 index 13f9bc0c2..000000000 --- a/model_zoo/vision/scaledyolov4/cpp/scaledyolov4.cc +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision.h" - -int main() { - namespace vis = fastdeploy::vision; - auto model = vis::wongkinyiu::ScaledYOLOv4("scaledyolov4.onnx"); - if (!model.Initialized()) { - std::cerr << "Init Failed." << std::endl; - return -1; - } - cv::Mat im = cv::imread("bus.jpg"); - cv::Mat vis_im = im.clone(); - - vis::DetectionResult res; - if (!model.Predict(&im, &res)) { - std::cerr << "Prediction Failed." << std::endl; - return -1; - } - - // 输出预测框结果 - std::cout << res.Str() << std::endl; - - // 可视化预测结果 - vis::Visualize::VisDetection(&vis_im, res); - cv::imwrite("vis_result.jpg", vis_im); - return 0; -} diff --git a/model_zoo/vision/scaledyolov4/scaled_yolov4.py b/model_zoo/vision/scaledyolov4/scaled_yolov4.py deleted file mode 100644 index 3bcf0fa58..000000000 --- a/model_zoo/vision/scaledyolov4/scaled_yolov4.py +++ /dev/null @@ -1,21 +0,0 @@ -import fastdeploy as fd -import cv2 - -# 下载模型和测试图片 -test_jpg_url = "https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/bus.jpg" -fd.download(test_jpg_url, ".", show_progress=True) - -# 加载模型 -model = fd.vision.wongkinyiu.ScaledYOLOv4("scaledyolov4.onnx") - -# 预测图片 -im = cv2.imread("bus.jpg") -result = model.predict(im, conf_threshold=0.25, nms_iou_threshold=0.5) - -# 可视化结果 -fd.vision.visualize.vis_detection(im, result) -cv2.imwrite("vis_result.jpg", im) - -# 输出预测结果 -print(result) -print(model.runtime_option) diff --git a/model_zoo/vision/scrfd/README.md b/model_zoo/vision/scrfd/README.md deleted file mode 100644 index 4424f59a3..000000000 --- a/model_zoo/vision/scrfd/README.md +++ /dev/null @@ -1,92 +0,0 @@ -# 编译SCRFD示例 - -当前支持模型版本为:[SCRFD CID:17cdeab](https://github.com/deepinsight/insightface/tree/17cdeab12a35efcebc2660453a8cbeae96e20950) - -本文档说明如何进行[SCRFD](https://github.com/deepinsight/insightface/tree/master/detection/scrfd)的快速部署推理。本目录结构如下 - -``` -. -├── cpp -│   ├── CMakeLists.txt -│   ├── README.md -│   └── scrfd.cc -├── README.md -└── scrfd.py -``` - -## 获取ONNX文件 - -- 手动获取 - - 访问[SCRFD](https://github.com/deepinsight/insightface/tree/master/detection/scrfd)官方github库,按照指引下载安装,下载`scrfd.pt` 模型,利用 `tools/scrfd2onnx.py` 得到`onnx`格式文件。 - - - - ``` - #下载scrfd模型文件 - e.g. download from https://onedrive.live.com/?authkey=%21ABbFJx2JMhNjhNA&id=4A83B6B633B029CC%215542&cid=4A83B6B633B029CC - - # 安装官方库配置环境,此版本导出环境为: - - 手动配置环境 - torch==1.8.0 - mmcv==1.3.5 - mmdet==2.7.0 - - - 通过docker配置 - docker pull qyjdefdocker/onnx-scrfd-converter:v0.3 - - # 导出onnx格式文件 - - 手动生成 - python tools/scrfd2onnx.py configs/scrfd/scrfd_500m.py weights/scrfd_500m.pth --shape 640 --input-img face-xxx.jpg - - - docker - docker的onnx目录中已有生成好的onnx文件 - - - # 移动onnx文件到demo目录 - cp PATH/TO/SCRFD.onnx PATH/TO/model_zoo/vision/scrfd/ - ``` - -## 安装FastDeploy - -使用如下命令安装FastDeploy,注意到此处安装的是`vision-cpu`,也可根据需求安装`vision-gpu` - -``` -# 安装fastdeploy-python工具 -pip install fastdeploy-python - -# 安装vision-cpu模块 -fastdeploy install vision-cpu -``` -## Python部署 - -执行如下代码即会自动下载测试图片 -``` -python scrfd.py -``` - -执行完成后会将可视化结果保存在本地`vis_result.jpg`,同时输出检测结果如下 -``` -FaceDetectionResult: [xmin, ymin, xmax, ymax, score] -437.670410,194.262772, 478.729828, 244.633911, 0.912465 -418.303650,118.277687, 455.877838, 169.209564, 0.911748 -269.449493,280.810608, 319.466614, 342.681213, 0.908530 -775.553955,237.509979, 814.626526, 286.252350, 0.901296 -565.155945,303.849670, 608.786255, 356.025726, 0.898307 -411.813477,296.117584, 454.560394, 353.151367, 0.889968 -688.620239,153.063812, 728.825195, 204.860321, 0.888146 -686.523071,304.881104, 732.901245, 364.715088, 0.885789 -194.658829,236.657883, 234.194748, 289.099701, 0.881143 -137.273422,286.025787, 183.479523, 344.614441, 0.877399 -289.256775,148.388992, 326.087769, 197.035645, 0.875090 -182.943939,154.105682, 221.422440, 204.460495, 0.871119 -330.301849,207.786499, 367.546692, 260.813232, 0.869559 -659.884216,254.861847, 701.580017, 307.984711, 0.869249 -550.305359,232.336868, 591.702026, 281.101532, 0.866158 -567.473511,127.402367, 604.959839, 175.831696, 0.858938 -``` - -## 其它文档 - -- [C++部署](./cpp/README.md) -- [SCRFD API文档](./api.md) diff --git a/model_zoo/vision/scrfd/api.md b/model_zoo/vision/scrfd/api.md deleted file mode 100644 index 442bd4a25..000000000 --- a/model_zoo/vision/scrfd/api.md +++ /dev/null @@ -1,71 +0,0 @@ -# SCRFD API说明 - -## Python API - -### SCRFD类 -``` -fastdeploy.vision.deepinsight.SCRFD(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX) -``` -SCRFD模型加载和初始化,当model_format为`fd.Frontend.ONNX`时,只需提供model_file,如`SCRFD.onnx`;当model_format为`fd.Frontend.PADDLE`时,则需同时提供model_file和params_file。 - -**参数** - -> * **model_file**(str): 模型文件路径 -> * **params_file**(str): 参数文件路径 -> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 -> * **model_format**(Frontend): 模型格式 - -#### predict函数 -> ``` -> SCRFD.predict(image_data, conf_threshold=0.25, nms_iou_threshold=0.5) -> ``` -> 模型预测结口,输入图像直接输出检测结果。 -> -> **参数** -> -> > * **image_data**(np.ndarray): 输入数据,注意需为HWC,BGR格式 -> > * **conf_threshold**(float): 检测框置信度过滤阈值 -> > * **nms_iou_threshold**(float): NMS处理过程中iou阈值 - -示例代码参考[scrfd.py](./scrfd.py) - - -## C++ API - -### SCRFD类 -``` -fastdeploy::vision::deepinsight::SCRFD( - const string& model_file, - const string& params_file = "", - const RuntimeOption& runtime_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX) -``` -SCRFD模型加载和初始化,当model_format为`Frontend::ONNX`时,只需提供model_file,如`SCRFD.onnx`;当model_format为`Frontend::PADDLE`时,则需同时提供model_file和params_file。 - -**参数** - -> * **model_file**(str): 模型文件路径 -> * **params_file**(str): 参数文件路径 -> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 -> * **model_format**(Frontend): 模型格式 - -#### Predict函数 -> ``` -> SCRFD::Predict(cv::Mat* im, FaceDetectionResult* result, -> float conf_threshold = 0.25, -> float nms_iou_threshold = 0.5) -> ``` -> 模型预测接口,输入图像直接输出检测结果。 -> -> **参数** -> -> > * **im**: 输入图像,注意需为HWC,BGR格式 -> > * **result**: 检测结果,包括检测框,各个框的置信度 -> > * **conf_threshold**: 检测框置信度过滤阈值 -> > * **nms_iou_threshold**: NMS处理过程中iou阈值 - -示例代码参考[cpp/scrfd.cc](cpp/scrfd.cc) - -## 其它API使用 - -- [模型部署RuntimeOption配置](../../../docs/api/runtime_option.md) diff --git a/model_zoo/vision/scrfd/cpp/CMakeLists.txt b/model_zoo/vision/scrfd/cpp/CMakeLists.txt deleted file mode 100644 index e63971ba1..000000000 --- a/model_zoo/vision/scrfd/cpp/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -PROJECT(scrfd_demo C CXX) -CMAKE_MINIMUM_REQUIRED (VERSION 3.16) - -# 在低版本ABI环境中,通过如下代码进行兼容性编译 -# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) - -# 指定下载解压后的fastdeploy库路径 -set(FASTDEPLOY_INSTALL_DIR ${PROJECT_SOURCE_DIR}/fastdeploy-linux-x64-0.3.0/) - -include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) - -# 添加FastDeploy依赖头文件 -include_directories(${FASTDEPLOY_INCS}) - -add_executable(scrfd_demo ${PROJECT_SOURCE_DIR}/scrfd.cc) -# 添加FastDeploy库依赖 -target_link_libraries(scrfd_demo ${FASTDEPLOY_LIBS}) diff --git a/model_zoo/vision/scrfd/cpp/README.md b/model_zoo/vision/scrfd/cpp/README.md deleted file mode 100644 index fe2ee64d3..000000000 --- a/model_zoo/vision/scrfd/cpp/README.md +++ /dev/null @@ -1,76 +0,0 @@ -# 编译SCRFD示例 - -当前支持模型版本为:[SCRFD CID:17cdeab](https://github.com/deepinsight/insightface/tree/17cdeab12a35efcebc2660453a8cbeae96e20950) - -本文档说明如何进行[SCRFD](https://github.com/deepinsight/insightface/tree/master/detection/scrfd)的快速部署推理。本目录结构如下 - -## 获取ONNX文件 - -- 手动获取 - - 访问[SCRFD](https://github.com/deepinsight/insightface/tree/master/detection/scrfd)官方github库,按照指引下载安装,下载`scrfd.pt` 模型,利用 `tools/scrfd2onnx.py` 得到`onnx`格式文件。 - - - ``` - #下载scrfd模型文件 - e.g. download from https://onedrive.live.com/?authkey=%21ABbFJx2JMhNjhNA&id=4A83B6B633B029CC%215542&cid=4A83B6B633B029CC - - # 安装官方库配置环境,此版本导出环境为: - - 手动配置环境 - torch==1.8.0 - mmcv==1.3.5 - mmdet==2.7.0 - - - 通过docker配置 - docker pull qyjdefdocker/onnx-scrfd-converter:v0.3 - - # 导出onnx格式文件 - - 手动生成 - python tools/scrfd2onnx.py configs/scrfd/scrfd_500m.py weights/scrfd_500m.pth --shape 640 --input-img face-xxx.jpg - - - docker - docker的onnx目录中已有生成好的onnx文件 - - -## 运行demo - -``` -# 下载和解压预测库 -wget https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.0.3.tgz -tar xvf fastdeploy-linux-x64-0.0.3.tgz - -# 编译示例代码 -mkdir build & cd build -cmake .. -make -j - -# 移动onnx文件到demo目录 -cp PATH/TO/SCRFD.onnx PATH/TO/model_zoo/vision/scrfd/cpp/build/ - -# 下载图片 -wget https://raw.githubusercontent.com/DefTruth/lite.ai.toolkit/main/examples/lite/resources/test_lite_face_detector_3.jpg - -# 执行 -./scrfd_demo -``` - -执行完后可视化的结果保存在本地`vis_result.jpg`,同时会将检测框输出在终端,如下所示 -``` -FaceDetectionResult: [xmin, ymin, xmax, ymax, score] -437.670410,194.262772, 478.729828, 244.633911, 0.912465 -418.303650,118.277687, 455.877838, 169.209564, 0.911748 -269.449493,280.810608, 319.466614, 342.681213, 0.908530 -775.553955,237.509979, 814.626526, 286.252350, 0.901296 -565.155945,303.849670, 608.786255, 356.025726, 0.898307 -411.813477,296.117584, 454.560394, 353.151367, 0.889968 -688.620239,153.063812, 728.825195, 204.860321, 0.888146 -686.523071,304.881104, 732.901245, 364.715088, 0.885789 -194.658829,236.657883, 234.194748, 289.099701, 0.881143 -137.273422,286.025787, 183.479523, 344.614441, 0.877399 -289.256775,148.388992, 326.087769, 197.035645, 0.875090 -182.943939,154.105682, 221.422440, 204.460495, 0.871119 -330.301849,207.786499, 367.546692, 260.813232, 0.869559 -659.884216,254.861847, 701.580017, 307.984711, 0.869249 -550.305359,232.336868, 591.702026, 281.101532, 0.866158 -567.473511,127.402367, 604.959839, 175.831696, 0.858938 -``` diff --git a/model_zoo/vision/scrfd/cpp/scrfd.cc b/model_zoo/vision/scrfd/cpp/scrfd.cc deleted file mode 100644 index 72dbeb4c7..000000000 --- a/model_zoo/vision/scrfd/cpp/scrfd.cc +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision.h" - -int main() { - namespace vis = fastdeploy::vision; - auto model = vis::deepinsight::SCRFD("SCRFD.onnx"); - if (!model.Initialized()) { - std::cerr << "Init Failed." << std::endl; - return -1; - } - cv::Mat im = cv::imread("test_lite_face_detector_3.jpg"); - cv::Mat vis_im = im.clone(); - - // 如果导入不带有关键点预测的模型,请修改模型参数 use_kps 和 landmarks_per_face,示例如下 - // model.landmarks_per_face = 0; - // model.use_kps = false; - - vis::FaceDetectionResult res; - if (!model.Predict(&im, &res)) { - std::cerr << "Prediction Failed." << std::endl; - return -1; - } - - // 输出预测框结果 - std::cout << res.Str() << std::endl; - - // 可视化预测结果 - vis::Visualize::VisFaceDetection(&vis_im, res, 2, 0.3f); - cv::imwrite("vis_result.jpg", vis_im); - return 0; -} diff --git a/model_zoo/vision/scrfd/scrfd.py b/model_zoo/vision/scrfd/scrfd.py deleted file mode 100644 index 1d4ae8c76..000000000 --- a/model_zoo/vision/scrfd/scrfd.py +++ /dev/null @@ -1,25 +0,0 @@ -import fastdeploy as fd -import cv2 - -# 下载模型和测试图片 -test_jpg_url = "https://raw.githubusercontent.com/DefTruth/lite.ai.toolkit/main/examples/lite/resources/test_lite_face_detector_3.jpg" -fd.download(test_jpg_url, ".", show_progress=True) - -# 加载模型 -model = fd.vision.deepinsight.SCRFD("SCRFD.onnx") - -# 如果导入不带有关键点预测的模型,请修改模型参数 use_kps 和 landmarks_per_face,示例如下 -# model.use_kps = False -# model.landmarks_per_face = 0 - -# 预测图片 -im = cv2.imread("test_lite_face_detector_3.jpg") -result = model.predict(im, conf_threshold=0.5, nms_iou_threshold=0.5) - -# 可视化结果 -fd.vision.visualize.vis_face_detection(im, result) -cv2.imwrite("vis_result.jpg", im) - -# 输出预测结果 -print(result) -print(model.runtime_option) diff --git a/model_zoo/vision/ultraface/README.md b/model_zoo/vision/ultraface/README.md deleted file mode 100644 index 264f1b5cb..000000000 --- a/model_zoo/vision/ultraface/README.md +++ /dev/null @@ -1,49 +0,0 @@ -# UltraFace部署示例 - -当前支持模型版本为:[UltraFace CommitID:dffdddd](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/commit/dffdddd) - -本文档说明如何进行[UltraFace](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/)的快速部署推理。本目录结构如下 - -``` -. -├── cpp # C++ 代码目录 -│   ├── CMakeLists.txt # C++ 代码编译CMakeLists文件 -│   ├── README.md # C++ 代码编译部署文档 -│   └── ultraface.cc # C++ 示例代码 -├── api.md # API 说明文档 -├── README.md # UltraFace 部署文档 -└── ultraface.py # Python示例代码 -``` - -## 安装FastDeploy - -使用如下命令安装FastDeploy,注意到此处安装的是`vision-cpu`,也可根据需求安装`vision-gpu` -```bash -# 安装fastdeploy-python工具 -pip install fastdeploy-python - -# 安装vision-cpu模块 -fastdeploy install vision-cpu -``` - -## Python部署 - -执行如下代码即会自动下载UltraFace模型和测试图片 -```bash -python ultraface.py -``` - -执行完成后会将可视化结果保存在本地`vis_result.jpg`,同时输出检测结果如下 -``` -FaceDetectionResult: [xmin, ymin, xmax, ymax, score] -742.528931,261.309937, 837.749146, 365.145599, 0.999833 -408.159332,253.410889, 484.747284, 353.378052, 0.999832 -549.409424,225.051819, 636.311890, 337.824707, 0.999782 -185.562805,233.364044, 252.001801, 323.948669, 0.999709 -304.065918,180.468140, 377.097961, 278.932861, 0.999645 -``` - -## 其它文档 - -- [C++部署](./cpp/README.md) -- [UltraFace API文档](./api.md) diff --git a/model_zoo/vision/ultraface/api.md b/model_zoo/vision/ultraface/api.md deleted file mode 100644 index 8dc7d2fb7..000000000 --- a/model_zoo/vision/ultraface/api.md +++ /dev/null @@ -1,71 +0,0 @@ -# UltraFace API说明 - -## Python API - -### UltraFace类 -``` -fastdeploy.vision.linzaer.UltraFace(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX) -``` -UltraFace模型加载和初始化,当model_format为`fd.Frontend.ONNX`时,只需提供model_file,如`version-RFB-320.onnx`;当model_format为`fd.Frontend.PADDLE`时,则需同时提供model_file和params_file。 - -**参数** - -> * **model_file**(str): 模型文件路径 -> * **params_file**(str): 参数文件路径 -> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 -> * **model_format**(Frontend): 模型格式 - -#### predict函数 -> ``` -> UltraFace.predict(image_data, conf_threshold=0.7, nms_iou_threshold=0.3) -> ``` -> 模型预测结口,输入图像直接输出检测结果。 -> -> **参数** -> -> > * **image_data**(np.ndarray): 输入数据,注意需为HWC,BGR格式 -> > * **conf_threshold**(float): 检测框置信度过滤阈值 -> > * **nms_iou_threshold**(float): NMS处理过程中iou阈值 - -示例代码参考[ultraface.py](./ultraface.py) - - -## C++ API - -### UltraFace类 -``` -fastdeploy::vision::linzaer::UltraFace( - const string& model_file, - const string& params_file = "", - const RuntimeOption& runtime_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX) -``` -UltraFace模型加载和初始化,当model_format为`Frontend::ONNX`时,只需提供model_file,如`version-RFB-320.onnx`;当model_format为`Frontend::PADDLE`时,则需同时提供model_file和params_file。 - -**参数** - -> * **model_file**(str): 模型文件路径 -> * **params_file**(str): 参数文件路径 -> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 -> * **model_format**(Frontend): 模型格式 - -#### Predict函数 -> ``` -> UltraFace::Predict(cv::Mat* im, FaceDetectionResult* result, -> float conf_threshold = 0.7, -> float nms_iou_threshold = 0.3) -> ``` -> 模型预测接口,输入图像直接输出检测结果。 -> -> **参数** -> -> > * **im**: 输入图像,注意需为HWC,BGR格式 -> > * **result**: 检测结果,包括检测框,各个框的置信度 -> > * **conf_threshold**: 检测框置信度过滤阈值 -> > * **nms_iou_threshold**: NMS处理过程中iou阈值 - -示例代码参考[cpp/ultraface.cc](cpp/ultraface.cc) - -## 其它API使用 - -- [模型部署RuntimeOption配置](../../../docs/api/runtime_option.md) diff --git a/model_zoo/vision/ultraface/cpp/CMakeLists.txt b/model_zoo/vision/ultraface/cpp/CMakeLists.txt deleted file mode 100644 index a33967dee..000000000 --- a/model_zoo/vision/ultraface/cpp/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -PROJECT(ultraface_demo C CXX) -CMAKE_MINIMUM_REQUIRED (VERSION 3.16) - -# 在低版本ABI环境中,通过如下代码进行兼容性编译 -# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) - -# 指定下载解压后的fastdeploy库路径 -set(FASTDEPLOY_INSTALL_DIR ${PROJECT_SOURCE_DIR}/fastdeploy-linux-x64-0.3.0/) - -include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) - -# 添加FastDeploy依赖头文件 -include_directories(${FASTDEPLOY_INCS}) - -add_executable(ultraface_demo ${PROJECT_SOURCE_DIR}/ultraface.cc) -# 添加FastDeploy库依赖 -target_link_libraries(ultraface_demo ${FASTDEPLOY_LIBS}) diff --git a/model_zoo/vision/ultraface/cpp/README.md b/model_zoo/vision/ultraface/cpp/README.md deleted file mode 100644 index d2098d838..000000000 --- a/model_zoo/vision/ultraface/cpp/README.md +++ /dev/null @@ -1,36 +0,0 @@ -# 编译UltraFace示例 - -当前支持模型版本为:[UltraFace CommitID:dffdddd](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/commit/dffdddd) - -## 下载和解压预测库 -```bash -wget https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.0.3.tgz -tar xvf fastdeploy-linux-x64-0.0.3.tgz -``` - -## 编译示例代码 -```bash -mkdir build & cd build -cmake .. -make -j -``` - -## 下载模型和图片 -wget https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/raw/master/models/onnx/version-RFB-320.onnx -wget https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/raw/master/imgs/3.jpg - - -## 执行 -```bash -./ultraface_demo -``` - -执行完后可视化的结果保存在本地`vis_result.jpg`,同时会将检测框输出在终端,如下所示 -``` -FaceDetectionResult: [xmin, ymin, xmax, ymax, score] -742.528931,261.309937, 837.749146, 365.145599, 0.999833 -408.159332,253.410889, 484.747284, 353.378052, 0.999832 -549.409424,225.051819, 636.311890, 337.824707, 0.999782 -185.562805,233.364044, 252.001801, 323.948669, 0.999709 -304.065918,180.468140, 377.097961, 278.932861, 0.999645 -``` diff --git a/model_zoo/vision/ultraface/cpp/ultraface.cc b/model_zoo/vision/ultraface/cpp/ultraface.cc deleted file mode 100644 index 9f1aa8a9b..000000000 --- a/model_zoo/vision/ultraface/cpp/ultraface.cc +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision.h" - -int main() { - namespace vis = fastdeploy::vision; - - auto model = vis::linzaer::UltraFace("version-RFB-320.onnx"); - if (!model.Initialized()) { - std::cerr << "Init Failed! Model: " << model_file << std::endl; - return -1; - } else { - std::cout << "Init Done! Model:" << model_file << std::endl; - } - model.EnableDebug(); - - cv::Mat im = cv::imread("3.jpg"); - cv::Mat vis_im = im.clone(); - - vis::FaceDetectionResult res; - if (!model.Predict(&im, &res, 0.7f, 0.3f)) { - std::cerr << "Prediction Failed." << std::endl; - return -1; - } else { - std::cout << "Prediction Done!" << std::endl; - } - - // 输出预测框结果 - std::cout << res.Str() << std::endl; - - // 可视化预测结果 - vis::Visualize::VisFaceDetection(&vis_im, res, 2, 0.3f); - cv::imwrite("vis_result.jpg", vis_im); - std::cout << "Detect Done! Saved: " << vis_path << std::endl; - return 0; -} diff --git a/model_zoo/vision/ultraface/ultraface.py b/model_zoo/vision/ultraface/ultraface.py deleted file mode 100644 index ceb4c313f..000000000 --- a/model_zoo/vision/ultraface/ultraface.py +++ /dev/null @@ -1,23 +0,0 @@ -import fastdeploy as fd -import cv2 - -# 下载模型 -model_url = "https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/raw/master/models/onnx/version-RFB-320.onnx" -test_img_url = "https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/raw/master/imgs/3.jpg" -fd.download(model_url, ".", show_progress=True) -fd.download(test_img_url, ".", show_progress=True) - -# 加载模型 -model = fd.vision.linzaer.UltraFace("version-RFB-320.onnx") - -# 预测图片 -im = cv2.imread("3.jpg") -result = model.predict(im, conf_threshold=0.7, nms_iou_threshold=0.3) - -# 可视化结果 -fd.vision.visualize.vis_face_detection(im, result) -cv2.imwrite("vis_result.jpg", im) - -# 输出预测结果 -print(result) -print(model.runtime_option) diff --git a/model_zoo/vision/yolor/README.md b/model_zoo/vision/yolor/README.md deleted file mode 100644 index 358e62bbe..000000000 --- a/model_zoo/vision/yolor/README.md +++ /dev/null @@ -1,66 +0,0 @@ -# 编译YOLOR示例 - -当前支持模型版本为:[YOLOR weights](https://github.com/WongKinYiu/yolor/releases/tag/weights) -(tips: 如果使用 `git clone` 的方式下载仓库代码,请将分支切换(checkout)到 `paper` 分支). - -本文档说明如何进行[YOLOR](https://github.com/WongKinYiu/yolor)的快速部署推理。本目录结构如下 - -``` -. -├── cpp -│   ├── CMakeLists.txt -│   ├── README.md -│   └── yolor.cc -├── README.md -└── yolor.py -``` - -## 获取ONNX文件 - -- 手动获取 - - 访问[YOLOR](https://github.com/WongKinYiu/yolor)官方github库,按照指引下载安装,下载`yolor.pt` 模型,利用 `models/export.py` 得到`onnx`格式文件。如果您导出的`onnx`模型出现精度不达标或者是数据维度的问题,可以参考[yolor#32](https://github.com/WongKinYiu/yolor/issues/32)的解决办法 - - ``` - #下载yolor模型文件 - wget https://github.com/WongKinYiu/yolor/releases/download/weights/yolor-d6-paper-570.pt - - # 导出onnx格式文件 - python models/export.py --weights PATH/TO/yolor-xx-xx-xx.pt --img-size 640 - - # 移动onnx文件到demo目录 - cp PATH/TO/yolor.onnx PATH/TO/model_zoo/vision/yolor/ - ``` - -## 安装FastDeploy - -使用如下命令安装FastDeploy,注意到此处安装的是`vision-cpu`,也可根据需求安装`vision-gpu` - -``` -# 安装fastdeploy-python工具 -pip install fastdeploy-python - -# 安装vision-cpu模块 -fastdeploy install vision-cpu -``` -## Python部署 - -执行如下代码即会自动下载测试图片 -``` -python yolor.py -``` - -执行完成后会将可视化结果保存在本地`vis_result.jpg`,同时输出检测结果如下 -``` -DetectionResult: [xmin, ymin, xmax, ymax, score, label_id] -0.000000,185.201431, 315.673126, 410.071594, 0.959289, 17 -433.802826,211.603455, 595.489319, 346.425537, 0.952615, 17 -230.446854,195.618805, 418.365479, 362.712128, 0.884253, 17 -336.545624,208.555618, 457.704315, 323.543152, 0.788450, 17 -0.896423,183.936996, 154.788727, 304.916412, 0.672804, 17 -``` - -## 其它文档 - -- [C++部署](./cpp/README.md) -- [YOLOR API文档](./api.md) diff --git a/model_zoo/vision/yolor/api.md b/model_zoo/vision/yolor/api.md deleted file mode 100644 index b1e5be889..000000000 --- a/model_zoo/vision/yolor/api.md +++ /dev/null @@ -1,71 +0,0 @@ -# YOLOR API说明 - -## Python API - -### YOLOR类 -``` -fastdeploy.vision.wongkinyiu.YOLOR(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX) -``` -YOLOR模型加载和初始化,当model_format为`fd.Frontend.ONNX`时,只需提供model_file,如`yolor.onnx`;当model_format为`fd.Frontend.PADDLE`时,则需同时提供model_file和params_file。 - -**参数** - -> * **model_file**(str): 模型文件路径 -> * **params_file**(str): 参数文件路径 -> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 -> * **model_format**(Frontend): 模型格式 - -#### predict函数 -> ``` -> YOLOR.predict(image_data, conf_threshold=0.25, nms_iou_threshold=0.5) -> ``` -> 模型预测结口,输入图像直接输出检测结果。 -> -> **参数** -> -> > * **image_data**(np.ndarray): 输入数据,注意需为HWC,BGR格式 -> > * **conf_threshold**(float): 检测框置信度过滤阈值 -> > * **nms_iou_threshold**(float): NMS处理过程中iou阈值 - -示例代码参考[yolor.py](./yolor.py) - - -## C++ API - -### YOLOR类 -``` -fastdeploy::vision::wongkinyiu::YOLOR( - const string& model_file, - const string& params_file = "", - const RuntimeOption& runtime_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX) -``` -YOLOR模型加载和初始化,当model_format为`Frontend::ONNX`时,只需提供model_file,如`yolor.onnx`;当model_format为`Frontend::PADDLE`时,则需同时提供model_file和params_file。 - -**参数** - -> * **model_file**(str): 模型文件路径 -> * **params_file**(str): 参数文件路径 -> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 -> * **model_format**(Frontend): 模型格式 - -#### Predict函数 -> ``` -> YOLOR::Predict(cv::Mat* im, DetectionResult* result, -> float conf_threshold = 0.25, -> float nms_iou_threshold = 0.5) -> ``` -> 模型预测接口,输入图像直接输出检测结果。 -> -> **参数** -> -> > * **im**: 输入图像,注意需为HWC,BGR格式 -> > * **result**: 检测结果,包括检测框,各个框的置信度 -> > * **conf_threshold**: 检测框置信度过滤阈值 -> > * **nms_iou_threshold**: NMS处理过程中iou阈值 - -示例代码参考[cpp/yolor.cc](cpp/yolor.cc) - -## 其它API使用 - -- [模型部署RuntimeOption配置](../../../docs/api/runtime_option.md) diff --git a/model_zoo/vision/yolor/cpp/CMakeLists.txt b/model_zoo/vision/yolor/cpp/CMakeLists.txt deleted file mode 100644 index 18248b845..000000000 --- a/model_zoo/vision/yolor/cpp/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -PROJECT(yolor_demo C CXX) -CMAKE_MINIMUM_REQUIRED (VERSION 3.16) - -# 在低版本ABI环境中,通过如下代码进行兼容性编译 -# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) - -# 指定下载解压后的fastdeploy库路径 -set(FASTDEPLOY_INSTALL_DIR ${PROJECT_SOURCE_DIR}/fastdeploy-linux-x64-0.3.0/) - -include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) - -# 添加FastDeploy依赖头文件 -include_directories(${FASTDEPLOY_INCS}) - -add_executable(yolor_demo ${PROJECT_SOURCE_DIR}/yolor.cc) -# 添加FastDeploy库依赖 -target_link_libraries(yolor_demo ${FASTDEPLOY_LIBS}) diff --git a/model_zoo/vision/yolor/cpp/README.md b/model_zoo/vision/yolor/cpp/README.md deleted file mode 100644 index d06bbe300..000000000 --- a/model_zoo/vision/yolor/cpp/README.md +++ /dev/null @@ -1,53 +0,0 @@ -# 编译YOLOR示例 - -当前支持模型版本为:[YOLOR weights](https://github.com/WongKinYiu/yolor/releases/tag/weights) -(tips: 如果使用 `git clone` 的方式下载仓库代码,请将分支切换(checkout)到 `paper` 分支). -## 获取ONNX文件 - -- 手动获取 - - 访问[YOLOR](https://github.com/WongKinYiu/yolor)官方github库,按照指引下载安装,下载`yolor.pt` 模型,利用 `models/export.py` 得到`onnx`格式文件。如果您导出的`onnx`模型出现精度不达标或者是数据维度的问题,可以参考[yolor#32](https://github.com/WongKinYiu/yolor/issues/32)的解决办法 - - ``` - #下载yolor模型文件 - wget https://github.com/WongKinYiu/yolor/releases/download/weights/yolor-d6-paper-570.pt - - # 导出onnx格式文件 - python models/export.py --weights PATH/TO/yolor-xx-xx-xx.pt --img-size 640 - - # 移动onnx文件到demo目录 - cp PATH/TO/yolor.onnx PATH/TO/model_zoo/vision/yolor/ - ``` - - -## 运行demo - -``` -# 下载和解压预测库 -wget https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.0.3.tgz -tar xvf fastdeploy-linux-x64-0.0.3.tgz - -# 编译示例代码 -mkdir build & cd build -cmake .. -make -j - -# 移动onnx文件到demo目录 -cp PATH/TO/yolor.onnx PATH/TO/model_zoo/vision/yolor/cpp/build/ - -# 下载图片 -wget https://raw.githubusercontent.com/WongKinYiu/yolor/paper/inference/images/horses.jpg - -# 执行 -./yolor_demo -``` - -执行完后可视化的结果保存在本地`vis_result.jpg`,同时会将检测框输出在终端,如下所示 -``` -DetectionResult: [xmin, ymin, xmax, ymax, score, label_id] -0.000000,185.201431, 315.673126, 410.071594, 0.959289, 17 -433.802826,211.603455, 595.489319, 346.425537, 0.952615, 17 -230.446854,195.618805, 418.365479, 362.712128, 0.884253, 17 -336.545624,208.555618, 457.704315, 323.543152, 0.788450, 17 -0.896423,183.936996, 154.788727, 304.916412, 0.672804, 17 -``` diff --git a/model_zoo/vision/yolor/cpp/yolor.cc b/model_zoo/vision/yolor/cpp/yolor.cc deleted file mode 100644 index db194583f..000000000 --- a/model_zoo/vision/yolor/cpp/yolor.cc +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision.h" - -int main() { - namespace vis = fastdeploy::vision; - auto model = vis::wongkinyiu::YOLOR("yolor.onnx"); - if (!model.Initialized()) { - std::cerr << "Init Failed." << std::endl; - return -1; - } - cv::Mat im = cv::imread("horses.jpg"); - cv::Mat vis_im = im.clone(); - - vis::DetectionResult res; - if (!model.Predict(&im, &res)) { - std::cerr << "Prediction Failed." << std::endl; - return -1; - } - - // 输出预测框结果 - std::cout << res.Str() << std::endl; - - // 可视化预测结果 - vis::Visualize::VisDetection(&vis_im, res); - cv::imwrite("vis_result.jpg", vis_im); - return 0; -} diff --git a/model_zoo/vision/yolor/yolor.py b/model_zoo/vision/yolor/yolor.py deleted file mode 100644 index 56d3f9689..000000000 --- a/model_zoo/vision/yolor/yolor.py +++ /dev/null @@ -1,21 +0,0 @@ -import fastdeploy as fd -import cv2 - -# 下载模型和测试图片 -test_jpg_url = "https://raw.githubusercontent.com/WongKinYiu/yolor/paper/inference/images/horses.jpg" -fd.download(test_jpg_url, ".", show_progress=True) - -# 加载模型 -model = fd.vision.wongkinyiu.YOLOR("yolor.onnx") - -# 预测图片 -im = cv2.imread("horses.jpg") -result = model.predict(im, conf_threshold=0.25, nms_iou_threshold=0.5) - -# 可视化结果 -fd.vision.visualize.vis_detection(im, result) -cv2.imwrite("vis_result.jpg", im) - -# 输出预测结果 -print(result) -print(model.runtime_option) diff --git a/model_zoo/vision/yolov5/README.md b/model_zoo/vision/yolov5/README.md deleted file mode 100644 index 03b19d44c..000000000 --- a/model_zoo/vision/yolov5/README.md +++ /dev/null @@ -1,47 +0,0 @@ -# YOLOv5部署示例 - -当前支持模型版本为:[YOLOv5 v6.0](https://github.com/ultralytics/yolov5/releases/download/v6.0) - -本文档说明如何进行[YOLOv5](https://github.com/ultralytics/yolov5)的快速部署推理。本目录结构如下 -``` -. -├── cpp # C++ 代码目录 -│   ├── CMakeLists.txt # C++ 代码编译CMakeLists文件 -│   ├── README.md # C++ 代码编译部署文档 -│   └── yolov5.cc # C++ 示例代码 -├── README.md # YOLOv5 部署文档 -└── yolov5.py # Python示例代码 -``` - -## 安装FastDeploy - -使用如下命令安装FastDeploy,注意到此处安装的是`vision-cpu`,也可根据需求安装`vision-gpu` -``` -# 安装fastdeploy-python工具 -pip install fastdeploy-python - -# 安装vision-cpu模块 -fastdeploy install vision-cpu -``` - -## Python部署 - -执行如下代码即会自动下载YOLOv5模型和测试图片 -``` -python yolov5.py -``` - -执行完成后会将可视化结果保存在本地`vis_result.jpg`,同时输出检测结果如下 -``` -DetectionResult: [xmin, ymin, xmax, ymax, score, label_id] -223.395142,403.948669, 345.337189, 867.339050, 0.856906, 0 -668.301758,400.781342, 808.441772, 882.534973, 0.829716, 0 -50.210720,398.571411, 243.123367, 905.016602, 0.805375, 0 -23.768242,214.979370, 802.627686, 778.840881, 0.756311, 5 -0.737200,552.281006, 78.617218, 890.945007, 0.363471, 0 -``` - -## 其它文档 - -- [C++部署](./cpp/README.md) -- [YOLOv5 API文档](./api.md) diff --git a/model_zoo/vision/yolov5/api.md b/model_zoo/vision/yolov5/api.md deleted file mode 100644 index 66d6acdc7..000000000 --- a/model_zoo/vision/yolov5/api.md +++ /dev/null @@ -1,71 +0,0 @@ -# YOLOv5 API说明 - -## Python API - -### YOLOv5类 -``` -fastdeploy.vision.ultralytics.YOLOv5(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX) -``` -YOLOv5模型加载和初始化,当model_format为`fd.Frontend.ONNX`时,只需提供model_file,如`yolov5s.onnx`;当model_format为`fd.Frontend.PADDLE`时,则需同时提供model_file和params_file。 - -**参数** - -> * **model_file**(str): 模型文件路径 -> * **params_file**(str): 参数文件路径 -> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 -> * **model_format**(Frontend): 模型格式 - -#### predict函数 -> ``` -> YOLOv5.predict(image_data, conf_threshold=0.25, nms_iou_threshold=0.5) -> ``` -> 模型预测结口,输入图像直接输出检测结果。 -> -> **参数** -> -> > * **image_data**(np.ndarray): 输入数据,注意需为HWC,BGR格式 -> > * **conf_threshold**(float): 检测框置信度过滤阈值 -> > * **nms_iou_threshold**(float): NMS处理过程中iou阈值 - -示例代码参考[yolov5.py](./yolov5.py) - - -## C++ API - -### YOLOv5类 -``` -fastdeploy::vision::ultralytics::YOLOv5( - const string& model_file, - const string& params_file = "", - const RuntimeOption& runtime_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX) -``` -YOLOv5模型加载和初始化,当model_format为`Frontend::ONNX`时,只需提供model_file,如`yolov5s.onnx`;当model_format为`Frontend::PADDLE`时,则需同时提供model_file和params_file。 - -**参数** - -> * **model_file**(str): 模型文件路径 -> * **params_file**(str): 参数文件路径 -> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 -> * **model_format**(Frontend): 模型格式 - -#### Predict函数 -> ``` -> YOLOv5::Predict(cv::Mat* im, DetectionResult* result, -> float conf_threshold = 0.25, -> float nms_iou_threshold = 0.5) -> ``` -> 模型预测接口,输入图像直接输出检测结果。 -> -> **参数** -> -> > * **im**: 输入图像,注意需为HWC,BGR格式 -> > * **result**: 检测结果,包括检测框,各个框的置信度 -> > * **conf_threshold**: 检测框置信度过滤阈值 -> > * **nms_iou_threshold**: NMS处理过程中iou阈值 - -示例代码参考[cpp/yolov5.cc](cpp/yolov5.cc) - -## 其它API使用 - -- [模型部署RuntimeOption配置](../../../docs/api/runtime_option.md) diff --git a/model_zoo/vision/yolov5/cpp/CMakeLists.txt b/model_zoo/vision/yolov5/cpp/CMakeLists.txt deleted file mode 100644 index c1f82a6fe..000000000 --- a/model_zoo/vision/yolov5/cpp/CMakeLists.txt +++ /dev/null @@ -1,18 +0,0 @@ -PROJECT(yolov5_demo C CXX) -CMAKE_MINIMUM_REQUIRED (VERSION 3.16) - -# 在低版本ABI环境中,通过如下代码进行兼容性编译 -# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) - -# 指定下载解压后的fastdeploy库路径 -set(FASTDEPLOY_INSTALL_DIR /fastdeploy/CustomOp/FastDeploy/build1/fastdeploy-linux-x64-gpu-0.3.0) - - -include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) - -# 添加FastDeploy依赖头文件 -include_directories(${FASTDEPLOY_INCS}) - -add_executable(yolov5_demo ${PROJECT_SOURCE_DIR}/yolov5.cc) -# 添加FastDeploy库依赖 -target_link_libraries(yolov5_demo ${FASTDEPLOY_LIBS}) diff --git a/model_zoo/vision/yolov5/cpp/README.md b/model_zoo/vision/yolov5/cpp/README.md deleted file mode 100644 index a1f1bde49..000000000 --- a/model_zoo/vision/yolov5/cpp/README.md +++ /dev/null @@ -1,31 +0,0 @@ -# 编译YOLOv5示例 - -当前支持模型版本为:[YOLOv5 v6.0](https://github.com/ultralytics/yolov5/releases/download/v6.0) - -``` -# 下载和解压预测库 -wget https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.0.3.tgz -tar xvf fastdeploy-linux-x64-0.0.3.tgz - -# 编译示例代码 -mkdir build & cd build -cmake .. -make -j - -# 下载模型和图片 -wget https://github.com/ultralytics/yolov5/releases/download/v6.0/yolov5s.onnx -wget https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/bus.jpg - -# 执行 -./yolov5_demo -``` - -执行完后可视化的结果保存在本地`vis_result.jpg`,同时会将检测框输出在终端,如下所示 -``` -DetectionResult: [xmin, ymin, xmax, ymax, score, label_id] -223.395142,403.948669, 345.337189, 867.339050, 0.856906, 0 -668.301758,400.781342, 808.441772, 882.534973, 0.829716, 0 -50.210720,398.571411, 243.123367, 905.016602, 0.805375, 0 -23.768242,214.979370, 802.627686, 778.840881, 0.756311, 5 -0.737200,552.281006, 78.617218, 890.945007, 0.363471, 0 -``` diff --git a/model_zoo/vision/yolov5/cpp/yolov5.cc b/model_zoo/vision/yolov5/cpp/yolov5.cc deleted file mode 100644 index dddcee843..000000000 --- a/model_zoo/vision/yolov5/cpp/yolov5.cc +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision.h" - -int main() { - namespace vis = fastdeploy::vision; - auto model = vis::ultralytics::YOLOv5("yolov5s.onnx"); - if (!model.Initialized()) { - std::cerr << "Init Failed." << std::endl; - return -1; - } - cv::Mat im = cv::imread("bus.jpg"); - cv::Mat vis_im = im.clone(); - - vis::DetectionResult res; - if (!model.Predict(&im, &res)) { - std::cerr << "Prediction Failed." << std::endl; - return -1; - } - - // 输出预测框结果 - std::cout << res.Str() << std::endl; - - // 可视化预测结果 - vis::Visualize::VisDetection(&vis_im, res); - cv::imwrite("vis_result.jpg", vis_im); - return 0; -} diff --git a/model_zoo/vision/yolov5/yolov5.py b/model_zoo/vision/yolov5/yolov5.py deleted file mode 100644 index c502c6636..000000000 --- a/model_zoo/vision/yolov5/yolov5.py +++ /dev/null @@ -1,23 +0,0 @@ -import fastdeploy as fd -import cv2 - -# 下载模型和测试图片 -model_url = "https://github.com/ultralytics/yolov5/releases/download/v6.0/yolov5s.onnx" -test_jpg_url = "https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/bus.jpg" -fd.download(model_url, ".", show_progress=True) -fd.download(test_jpg_url, ".", show_progress=True) - -# 加载模型 -model = fd.vision.ultralytics.YOLOv5("yolov5s.onnx") - -# 预测图片 -im = cv2.imread("bus.jpg") -result = model.predict(im, conf_threshold=0.25, nms_iou_threshold=0.5) - -# 可视化结果 -fd.vision.visualize.vis_detection(im, result) -cv2.imwrite("vis_result.jpg", im) - -# 输出预测结果 -print(result) -print(model.runtime_option) diff --git a/model_zoo/vision/yolov5face/README.md b/model_zoo/vision/yolov5face/README.md deleted file mode 100644 index e1713e67d..000000000 --- a/model_zoo/vision/yolov5face/README.md +++ /dev/null @@ -1,78 +0,0 @@ -# YOLOv5Face部署示例 - -当前支持模型版本为:[YOLOv5Face CommitID:4fd1ead](https://github.com/deepcam-cn/yolov5-face/commit/4fd1ead) - -本文档说明如何进行[YOLOv5Face](https://github.com/deepcam-cn/yolov5-face)的快速部署推理。本目录结构如下 - -``` -. -├── cpp # C++ 代码目录 -│   ├── CMakeLists.txt # C++ 代码编译CMakeLists文件 -│   ├── README.md # C++ 代码编译部署文档 -│   └── yolov5face.cc # C++ 示例代码 -├── api.md # API 说明文档 -├── README.md # YOLOv5Face 部署文档 -└── yolov5face.py # Python示例代码 -``` - -## 获取ONNX文件 - -访问[YOLOv5Face](https://github.com/deepcam-cn/yolov5-face)官方github库,按照指引下载安装,下载`yolov5s-face.pt` 模型,利用 `export.py` 得到`onnx`格式文件。 - -* 下载yolov5face模型文件 - ``` - Link: https://pan.baidu.com/s/1fyzLxZYx7Ja1_PCIWRhxbw Link: eq0q - https://drive.google.com/file/d/1zxaHeLDyID9YU4-hqK7KNepXIwbTkRIO/view?usp=sharing - ``` - -* 导出onnx格式文件 - ```bash - PYTHONPATH=. python export.py --weights weights/yolov5s-face.pt --img_size 640 640 --batch_size 1 - ``` -* onnx模型简化(可选) - ```bash - onnxsim yolov5s-face.onnx yolov5s-face.onnx - ``` -* 移动onnx文件到model_zoo/yolov5face的目录 - ```bash - cp PATH/TO/yolov5s-face.onnx PATH/TO/model_zoo/vision/yolov5face/ - ``` - - - -## 准备测试图片 -准备一张包含人脸的测试图片,命名为test.jpg,并拷贝到可执行文件所在的目录 - -## 安装FastDeploy - -使用如下命令安装FastDeploy,注意到此处安装的是`vision-cpu`,也可根据需求安装`vision-gpu` -```bash -# 安装fastdeploy-python工具 -pip install fastdeploy-python - -# 安装vision-cpu模块 -fastdeploy install vision-cpu -``` - -## Python部署 - -执行如下代码即会自动下载YOLOv5Face模型和测试图片 -```bash -python yolov5face.py -``` - -执行完成后会将可视化结果保存在本地`vis_result.jpg`,同时输出检测结果如下 -``` -FaceDetectionResult: [xmin, ymin, xmax, ymax, score, (x, y) x 5] -749.575256,375.122162, 775.008850, 407.858215, 0.851824, (756.933838,388.423157), (767.810974,387.932922), (762.617065,394.212341), (758.053101,399.073639), (767.370300,398.769470) -897.833862,380.372864, 924.725281, 409.566803, 0.847505, (903.757202,390.221741), (914.575867,389.495911), (908.998901,395.983307), (905.803223,400.871429), (914.674438,400.268066) -281.558197,367.739349, 305.474701, 397.860535, 0.840915, (287.018768,379.771088), (297.285004,378.755280), (292.057831,385.207367), (289.110962,390.010437), (297.535339,389.412048) -132.922104,368.507263, 159.098541, 402.777283, 0.840232, (140.632492,382.361633), (151.900864,380.966156), (146.869186,388.505066), (141.930420,393.724670), (151.734604,392.808197) -699.379700,306.743256, 723.219421, 336.533295, 0.840228, (705.688843,319.133301), (715.784668,318.449524), (711.107300,324.416016), (707.236633,328.671936), (716.088623,328.151794) -# ... -``` - -## 其它文档 - -- [C++部署](./cpp/README.md) -- [YOLOv5Face API文档](./api.md) diff --git a/model_zoo/vision/yolov5face/api.md b/model_zoo/vision/yolov5face/api.md deleted file mode 100644 index ea32820f6..000000000 --- a/model_zoo/vision/yolov5face/api.md +++ /dev/null @@ -1,71 +0,0 @@ -# YOLOv5Face API说明 - -## Python API - -### YOLOv5Face类 -``` -fastdeploy.vision.deepcam.YOLOv5Face(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX) -``` -YOLOv5Face模型加载和初始化,当model_format为`fd.Frontend.ONNX`时,只需提供model_file,如`yolov5s-face.onnx`;当model_format为`fd.Frontend.PADDLE`时,则需同时提供model_file和params_file。 - -**参数** - -> * **model_file**(str): 模型文件路径 -> * **params_file**(str): 参数文件路径 -> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 -> * **model_format**(Frontend): 模型格式 - -#### predict函数 -> ``` -> YOLOv5Face.predict(image_data, conf_threshold=0.25, nms_iou_threshold=0.5) -> ``` -> 模型预测结口,输入图像直接输出检测结果。 -> -> **参数** -> -> > * **image_data**(np.ndarray): 输入数据,注意需为HWC,BGR格式 -> > * **conf_threshold**(float): 检测框置信度过滤阈值 -> > * **nms_iou_threshold**(float): NMS处理过程中iou阈值 - -示例代码参考[yolov5face.py](./yolov5face.py) - - -## C++ API - -### YOLOv5Face类 -``` -fastdeploy::vision::deepcam::YOLOv5Face( - const string& model_file, - const string& params_file = "", - const RuntimeOption& runtime_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX) -``` -YOLOv5Face模型加载和初始化,当model_format为`Frontend::ONNX`时,只需提供model_file,如`yolov5s-face.onnx`;当model_format为`Frontend::PADDLE`时,则需同时提供model_file和params_file。 - -**参数** - -> * **model_file**(str): 模型文件路径 -> * **params_file**(str): 参数文件路径 -> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 -> * **model_format**(Frontend): 模型格式 - -#### Predict函数 -> ``` -> YOLOv5Face::Predict(cv::Mat* im, FaceDetectionResult* result, -> float conf_threshold = 0.25, -> float nms_iou_threshold = 0.5) -> ``` -> 模型预测接口,输入图像直接输出检测结果。 -> -> **参数** -> -> > * **im**: 输入图像,注意需为HWC,BGR格式 -> > * **result**: 检测结果,包括检测框,各个框的置信度 -> > * **conf_threshold**: 检测框置信度过滤阈值 -> > * **nms_iou_threshold**: NMS处理过程中iou阈值 - -示例代码参考[cpp/yolov5face.cc](cpp/yolov5face.cc) - -## 其它API使用 - -- [模型部署RuntimeOption配置](../../../docs/api/runtime_option.md) diff --git a/model_zoo/vision/yolov5face/cpp/CMakeLists.txt b/model_zoo/vision/yolov5face/cpp/CMakeLists.txt deleted file mode 100644 index 23878ac2c..000000000 --- a/model_zoo/vision/yolov5face/cpp/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -PROJECT(yolov5face_demo C CXX) -CMAKE_MINIMUM_REQUIRED (VERSION 3.16) - -# 在低版本ABI环境中,通过如下代码进行兼容性编译 -# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) - -# 指定下载解压后的fastdeploy库路径 -set(FASTDEPLOY_INSTALL_DIR ${PROJECT_SOURCE_DIR}/fastdeploy-linux-x64-0.3.0/) - -include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) - -# 添加FastDeploy依赖头文件 -include_directories(${FASTDEPLOY_INCS}) - -add_executable(yolov5face_demo ${PROJECT_SOURCE_DIR}/yolov5face.cc) -# 添加FastDeploy库依赖 -target_link_libraries(yolov5face_demo ${FASTDEPLOY_LIBS}) diff --git a/model_zoo/vision/yolov5face/cpp/README.md b/model_zoo/vision/yolov5face/cpp/README.md deleted file mode 100644 index 60d46cb87..000000000 --- a/model_zoo/vision/yolov5face/cpp/README.md +++ /dev/null @@ -1,60 +0,0 @@ -# 编译YOLOv5Face示例 - -当前支持模型版本为:[YOLOv5Face CommitID:4fd1ead](https://github.com/deepcam-cn/yolov5-face/commit/4fd1ead) - -## 下载和解压预测库 -```bash -wget https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.0.3.tgz -tar xvf fastdeploy-linux-x64-0.0.3.tgz -``` - -## 编译示例代码 -```bash -mkdir build & cd build -cmake .. -make -j -``` - -## 获取ONNX文件 - -访问[YOLOv5Face](https://github.com/deepcam-cn/yolov5-face)官方github库,按照指引下载安装,下载`yolov5s-face.pt` 模型,利用 `export.py` 得到`onnx`格式文件。 - -* 下载yolov5face模型文件 - ``` - Link: https://pan.baidu.com/s/1fyzLxZYx7Ja1_PCIWRhxbw Link: eq0q - https://drive.google.com/file/d/1zxaHeLDyID9YU4-hqK7KNepXIwbTkRIO/view?usp=sharing - ``` - -* 导出onnx格式文件 - ```bash - PYTHONPATH=. python export.py --weights weights/yolov5s-face.pt --img_size 640 640 --batch_size 1 - ``` -* onnx模型简化(可选) - ```bash - onnxsim yolov5s-face.onnx yolov5s-face.onnx - ``` -* 移动onnx文件到可执行文件的目录 - ```bash - cp PATH/TO/yolov5s-face.onnx PATH/TO/model_zoo/vision/yolov5face/cpp/build - ``` - - - -## 准备测试图片 -准备一张包含人脸的测试图片,命名为test.jpg,并拷贝到可执行文件所在的目录 - -## 执行 -```bash -./yolov5face_demo -``` - -执行完后可视化的结果保存在本地`vis_result.jpg`,同时会将检测框输出在终端,如下所示 -``` -FaceDetectionResult: [xmin, ymin, xmax, ymax, score, (x, y) x 5] -749.575256,375.122162, 775.008850, 407.858215, 0.851824, (756.933838,388.423157), (767.810974,387.932922), (762.617065,394.212341), (758.053101,399.073639), (767.370300,398.769470) -897.833862,380.372864, 924.725281, 409.566803, 0.847505, (903.757202,390.221741), (914.575867,389.495911), (908.998901,395.983307), (905.803223,400.871429), (914.674438,400.268066) -281.558197,367.739349, 305.474701, 397.860535, 0.840915, (287.018768,379.771088), (297.285004,378.755280), (292.057831,385.207367), (289.110962,390.010437), (297.535339,389.412048) -132.922104,368.507263, 159.098541, 402.777283, 0.840232, (140.632492,382.361633), (151.900864,380.966156), (146.869186,388.505066), (141.930420,393.724670), (151.734604,392.808197) -699.379700,306.743256, 723.219421, 336.533295, 0.840228, (705.688843,319.133301), (715.784668,318.449524), (711.107300,324.416016), (707.236633,328.671936), (716.088623,328.151794) -# ... -``` diff --git a/model_zoo/vision/yolov5face/cpp/yolov5face.cc b/model_zoo/vision/yolov5face/cpp/yolov5face.cc deleted file mode 100644 index baa0bb7c0..000000000 --- a/model_zoo/vision/yolov5face/cpp/yolov5face.cc +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision.h" - -int main() { - namespace vis = fastdeploy::vision; - auto model = vis::deepcam::YOLOv5Face("yolov5s-face.onnx"); - if (!model.Initialized()) { - std::cerr << "Init Failed." << std::endl; - return -1; - } - cv::Mat im = cv::imread("test.jpg"); - cv::Mat vis_im = im.clone(); - - vis::FaceDetectionResult res; - if (!model.Predict(&im, &res, 0.1f, 0.3f)) { - std::cerr << "Prediction Failed." << std::endl; - return -1; - } - - // 输出预测框结果 - std::cout << res.Str() << std::endl; - - // 可视化预测结果 - vis::Visualize::VisFaceDetection(&vis_im, res, 2, 0.3f); - cv::imwrite("vis_result.jpg", vis_im); - return 0; -} diff --git a/model_zoo/vision/yolov5face/yolov5face.py b/model_zoo/vision/yolov5face/yolov5face.py deleted file mode 100644 index ff7ab1b77..000000000 --- a/model_zoo/vision/yolov5face/yolov5face.py +++ /dev/null @@ -1,17 +0,0 @@ -import fastdeploy as fd -import cv2 - -# 加载模型 -model = fd.vision.deepcam.YOLOv5Face("yolov5s-face.onnx") - -# 预测图片 -im = cv2.imread("test.jpg") -result = model.predict(im, conf_threshold=0.1, nms_iou_threshold=0.3) - -# 可视化结果 -fd.vision.visualize.vis_face_detection(im, result) -cv2.imwrite("vis_result.jpg", im) - -# 输出预测结果 -print(result) -print(model.runtime_option) diff --git a/model_zoo/vision/yolov5lite/README.md b/model_zoo/vision/yolov5lite/README.md deleted file mode 100644 index 22c726e85..000000000 --- a/model_zoo/vision/yolov5lite/README.md +++ /dev/null @@ -1,130 +0,0 @@ -# 编译YOLOv5Lite示例 - -当前支持模型版本为:[YOLOv5-Lite-v1.4](https://github.com/ppogg/YOLOv5-Lite/releases/tag/v1.4) - -本文档说明如何进行[YOLOv5Lite](https://github.com/ppogg/YOLOv5-Lite)的快速部署推理。本目录结构如下 - -``` -. -├── cpp -│   ├── CMakeLists.txt -│   ├── README.md -│   └── yolov5lite.cc -├── README.md -└── yolov5lite.py -``` - -## 获取ONNX文件 -- 自动获取 - 访问[YOLOv5Lite](https://github.com/ppogg/YOLOv5-Lite) -官方github库,按照指引下载安装,下载`yolov5-lite-xx.onnx` 模型(Tips:官方提供的ONNX文件目前是没有decode模块的) - ``` - #下载yolov5-lite模型文件(.onnx) - Download from https://drive.google.com/file/d/1bJByk9eoS6pv8Z3N4bcLRCV3i7uk24aU/view - 官方Repo也支持百度云下载 - ``` - -- 手动获取 - - 访问[YOLOv5Lite](https://github.com/ppogg/YOLOv5-Lite) -官方github库,按照指引下载安装,下载`yolov5-lite-xx.pt` 模型,利用 `export.py` 得到`onnx`格式文件。 - - - 导出含有decode模块的ONNX文件 - - 首先需要参考[YOLOv5-Lite#189](https://github.com/ppogg/YOLOv5-Lite/pull/189)的解决办法,修改代码。 - - ``` - #下载yolov5-lite模型文件(.pt) - Download from https://drive.google.com/file/d/1oftzqOREGqDCerf7DtD5BZp9YWELlkMe/view - 官方Repo也支持百度云下载 - - # 导出onnx格式文件 - python export.py --grid --dynamic --concat --weights PATH/TO/yolov5-lite-xx.pt - - # 移动onnx文件到demo目录 - cp PATH/TO/yolov5lite.onnx PATH/TO/model_zoo/vision/yolov5lite/ - ``` - - 导出无decode模块的ONNX文件(不需要修改代码) - - ``` - #下载yolov5-lite模型文件 - Download from https://drive.google.com/file/d/1oftzqOREGqDCerf7DtD5BZp9YWELlkMe/view - 官方Repo也支持百度云下载 - - # 导出onnx格式文件 - python export.py --grid --dynamic --weights PATH/TO/yolov5-lite-xx.pt - - # 移动onnx文件到demo目录 - cp PATH/TO/yolov5lite.onnx PATH/TO/model_zoo/vision/yolov5lite/ - ``` -## 安装FastDeploy - -使用如下命令安装FastDeploy,注意到此处安装的是`vision-cpu`,也可根据需求安装`vision-gpu` - -``` -# 安装fastdeploy-python工具 -pip install fastdeploy-python - -# 安装vision-cpu模块 -fastdeploy install vision-cpu -``` - -## 设置ONNX文件处理方式 - -如果ONNX文件是含有decode模块的,设置`model.is_decode_exported = True`(解除yolov5lite.py第12行注释) - -如果ONNX文件是无decode模块的,不用做任何处理,默认是`model.is_decode_exported = False` - -## Python部署 - -执行如下代码即会自动下载测试图片 -``` -python yolov5lite.py -``` - -执行完成后会将可视化结果保存在本地`vis_result.jpg`,同时输出检测结果如下 -``` -DetectionResult: [xmin, ymin, xmax, ymax, score, label_id] -1289.729126,698.414612, 1404.110229, 1023.949524, 0.893141, 0 -300.958649,1027.166992, 449.921753, 1299.823608, 0.887509, 0 -627.481201,823.830750, 718.942078, 1133.402344, 0.885308, 0 -152.969437,1147.352905, 257.228424, 1301.652710, 0.877009, 0 -512.867188,773.371094, 649.768494, 1123.529785, 0.870583, 0 -906.801147,508.160278, 997.325867, 825.934509, 0.867381, 0 -307.480988,87.785973, 408.681732, 387.337463, 0.860646, 0 -783.116821,492.420319, 871.741028, 774.283691, 0.851574, 0 -1347.626343,190.911758, 1452.582031, 459.044617, 0.837095, 0 -3.035009,3.509769, 97.237442, 257.884094, 0.835671, 0 -261.199738,303.971527, 371.036041, 569.222595, 0.834187, 0 -1170.358032,722.587219, 1284.564087, 1036.034302, 0.833685, 0 -660.728333,476.764618, 760.990723, 783.636414, 0.823469, 0 -777.628906,815.975098, 886.895935, 1115.206421, 0.820669, 0 -415.902740,983.790283, 543.582764, 1300.361206, 0.791539, 0 -132.273209,40.751694, 210.614563, 285.128174, 0.790815, 0 -1331.930664,370.903687, 1446.262573, 638.119202, 0.773755, 0 -1254.425293,31.073910, 1352.297241, 312.583282, 0.743923, 0 -915.965088,310.556458, 1031.921265, 624.672302, 0.696823, 0 -499.573517,362.165588, 595.503296, 624.872070, 0.678821, 0 -956.890747,76.389160, 1068.599609, 340.183533, 0.656648, 0 -452.388977,320.288269, 532.330688, 593.987915, 0.652459, 0 -488.305664,1028.187012, 565.136719, 1179.688477, 0.629574, 24 -855.175781,868.482422, 916.516113, 988.196777, 0.555574, 26 -1321.689453,1.638852, 1384.584961, 99.413322, 0.504122, 0 -845.324707,531.824768, 875.939941, 614.515198, 0.472173, 26 -1342.546387,2.096432, 1420.351929, 98.888016, 0.463313, 0 -990.747070,635.389221, 1018.249512, 695.264709, 0.444000, 26 -956.799316,120.643112, 1015.100098, 242.920944, 0.442043, 26 -560.449219,401.270538, 607.763672, 522.486389, 0.434484, 26 -1329.199219,372.522980, 1443.054199, 635.315979, 0.399014, 26 -956.140137,88.526413, 1047.509766, 305.213409, 0.367863, 26 -1379.296875,852.808594, 1406.909180, 916.456055, 0.366000, 26 -1331.909180,468.433624, 1369.299316, 532.044495, 0.352329, 26 -864.880371,915.723633, 916.223145, 990.979980, 0.325205, 26 -260.778809,341.724640, 322.229004, 442.432648, 0.320724, 24 -1271.154785,77.393600, 1336.230469, 186.194870, 0.307823, 26 -``` - -## 其它文档 - -- [C++部署](./cpp/README.md) -- [YOLOv5Lite API文档](./api.md) diff --git a/model_zoo/vision/yolov5lite/api.md b/model_zoo/vision/yolov5lite/api.md deleted file mode 100644 index 38cd87725..000000000 --- a/model_zoo/vision/yolov5lite/api.md +++ /dev/null @@ -1,71 +0,0 @@ -# YOLOv5Lite API说明 - -## Python API - -### YOLOv5Lite类 -``` -fastdeploy.vision.ppogg.YOLOv5Lite(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX) -``` -YOLOv5Lite模型加载和初始化,当model_format为`fd.Frontend.ONNX`时,只需提供model_file,如`yolov5lite.onnx`;当model_format为`fd.Frontend.PADDLE`时,则需同时提供model_file和params_file。 - -**参数** - -> * **model_file**(str): 模型文件路径 -> * **params_file**(str): 参数文件路径 -> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 -> * **model_format**(Frontend): 模型格式 - -#### predict函数 -> ``` -> YOLOv5Lite.predict(image_data, conf_threshold=0.25, nms_iou_threshold=0.5) -> ``` -> 模型预测结口,输入图像直接输出检测结果。 -> -> **参数** -> -> > * **image_data**(np.ndarray): 输入数据,注意需为HWC,BGR格式 -> > * **conf_threshold**(float): 检测框置信度过滤阈值 -> > * **nms_iou_threshold**(float): NMS处理过程中iou阈值 - -示例代码参考[yolov5_lite.py](./yolov5_lite.py) - - -## C++ API - -### YOLOv5Lite类 -``` -fastdeploy::vision::ppogg::YOLOv5Lite( - const string& model_file, - const string& params_file = "", - const RuntimeOption& runtime_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX) -``` -YOLOv5Lite模型加载和初始化,当model_format为`Frontend::ONNX`时,只需提供model_file,如`yolov5lite.onnx`;当model_format为`Frontend::PADDLE`时,则需同时提供model_file和params_file。 - -**参数** - -> * **model_file**(str): 模型文件路径 -> * **params_file**(str): 参数文件路径 -> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 -> * **model_format**(Frontend): 模型格式 - -#### Predict函数 -> ``` -> YOLOv5Lite::Predict(cv::Mat* im, DetectionResult* result, -> float conf_threshold = 0.25, -> float nms_iou_threshold = 0.5) -> ``` -> 模型预测接口,输入图像直接输出检测结果。 -> -> **参数** -> -> > * **im**: 输入图像,注意需为HWC,BGR格式 -> > * **result**: 检测结果,包括检测框,各个框的置信度 -> > * **conf_threshold**: 检测框置信度过滤阈值 -> > * **nms_iou_threshold**: NMS处理过程中iou阈值 - -示例代码参考[cpp/yolov5lite.cc](cpp/yolov5lite.cc) - -## 其它API使用 - -- [模型部署RuntimeOption配置](../../../docs/api/runtime_option.md) diff --git a/model_zoo/vision/yolov5lite/cpp/CMakeLists.txt b/model_zoo/vision/yolov5lite/cpp/CMakeLists.txt deleted file mode 100644 index 855076a08..000000000 --- a/model_zoo/vision/yolov5lite/cpp/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -PROJECT(yolov5lite_demo C CXX) -CMAKE_MINIMUM_REQUIRED (VERSION 3.16) - -# 在低版本ABI环境中,通过如下代码进行兼容性编译 -# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) - -# 指定下载解压后的fastdeploy库路径 -set(FASTDEPLOY_INSTALL_DIR ${PROJECT_SOURCE_DIR}/fastdeploy-linux-x64-0.3.0/) - -include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) - -# 添加FastDeploy依赖头文件 -include_directories(${FASTDEPLOY_INCS}) - -add_executable(yolov5lite_demo ${PROJECT_SOURCE_DIR}/yolov5lite.cc) -# 添加FastDeploy库依赖 -target_link_libraries(yolov5lite_demo ${FASTDEPLOY_LIBS}) diff --git a/model_zoo/vision/yolov5lite/cpp/README.md b/model_zoo/vision/yolov5lite/cpp/README.md deleted file mode 100644 index 495fb8145..000000000 --- a/model_zoo/vision/yolov5lite/cpp/README.md +++ /dev/null @@ -1,117 +0,0 @@ -# 编译YOLOv5Lite示例 - -当前支持模型版本为:[YOLOv5-Lite-v1.4](https://github.com/ppogg/YOLOv5-Lite/releases/tag/v1.4) - -## 获取ONNX文件 -- 自动获取 - 访问[YOLOv5Lite](https://github.com/ppogg/YOLOv5-Lite) -官方github库,按照指引下载安装,下载`yolov5-lite-xx.onnx` 模型(Tips:官方提供的ONNX文件目前是没有decode模块的) - ``` - #下载yolov5-lite模型文件(.onnx) - Download from https://drive.google.com/file/d/1bJByk9eoS6pv8Z3N4bcLRCV3i7uk24aU/view - 官方Repo也支持百度云下载 - ``` - -- 手动获取 - - 访问[YOLOv5Lite](https://github.com/ppogg/YOLOv5-Lite) -官方github库,按照指引下载安装,下载`yolov5-lite-xx.pt` 模型,利用 `export.py` 得到`onnx`格式文件。 - - - 导出含有decode模块的ONNX文件 - - 首先需要参考[YOLOv5-Lite#189](https://github.com/ppogg/YOLOv5-Lite/pull/189)的解决办法,修改代码。 - - ``` - #下载yolov5-lite模型文件(.pt) - Download from https://drive.google.com/file/d/1oftzqOREGqDCerf7DtD5BZp9YWELlkMe/view - 官方Repo也支持百度云下载 - - # 导出onnx格式文件 - python export.py --grid --dynamic --concat --weights PATH/TO/yolov5-lite-xx.pt - - # 移动onnx文件到demo目录 - cp PATH/TO/yolov5lite.onnx PATH/TO/model_zoo/vision/yolov5lite/ - ``` - - 导出无decode模块的ONNX文件(不需要修改代码) - - ``` - #下载yolov5-lite模型文件 - Download from https://drive.google.com/file/d/1oftzqOREGqDCerf7DtD5BZp9YWELlkMe/view - 官方Repo也支持百度云下载 - - # 导出onnx格式文件 - python export.py --grid --dynamic --weights PATH/TO/yolov5-lite-xx.pt - - # 移动onnx文件到demo目录 - cp PATH/TO/yolov5lite.onnx PATH/TO/model_zoo/vision/yolov5lite/ - ``` - -## 设置ONNX文件处理方式 - -如果ONNX文件是含有decode模块的,设置`model.is_decode_exported = true`(解除yolov5lite.cc第27行注释) - -如果ONNX文件是无decode模块的,不用做任何处理,默认是`model.is_decode_exported = false` - -## 运行demo - -``` -# 下载和解压预测库 -wget https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.0.3.tgz -tar xvf fastdeploy-linux-x64-0.0.3.tgz - -# 编译示例代码 -mkdir build & cd build -cmake .. -make -j - -# 移动onnx文件到demo目录 -cp PATH/TO/yolov5lite.onnx PATH/TO/model_zoo/vision/yolov5lite/cpp/build/ - -# 下载图片 -wget https://raw.githubusercontent.com/ppogg/YOLOv5-Lite/master/cpp_demo/mnn/test.jpg - -# 执行 -./yolov5lite_demo -``` - -执行完后可视化的结果保存在本地`vis_result.jpg`,同时会将检测框输出在终端,如下所示 -``` -DetectionResult: [xmin, ymin, xmax, ymax, score, label_id] -1289.729126,698.414612, 1404.110229, 1023.949524, 0.893141, 0 -300.958649,1027.166992, 449.921753, 1299.823608, 0.887509, 0 -627.481201,823.830750, 718.942078, 1133.402344, 0.885308, 0 -152.969437,1147.352905, 257.228424, 1301.652710, 0.877009, 0 -512.867188,773.371094, 649.768494, 1123.529785, 0.870583, 0 -906.801147,508.160278, 997.325867, 825.934509, 0.867381, 0 -307.480988,87.785973, 408.681732, 387.337463, 0.860646, 0 -783.116821,492.420319, 871.741028, 774.283691, 0.851574, 0 -1347.626343,190.911758, 1452.582031, 459.044617, 0.837095, 0 -3.035009,3.509769, 97.237442, 257.884094, 0.835671, 0 -261.199738,303.971527, 371.036041, 569.222595, 0.834187, 0 -1170.358032,722.587219, 1284.564087, 1036.034302, 0.833685, 0 -660.728333,476.764618, 760.990723, 783.636414, 0.823469, 0 -777.628906,815.975098, 886.895935, 1115.206421, 0.820669, 0 -415.902740,983.790283, 543.582764, 1300.361206, 0.791539, 0 -132.273209,40.751694, 210.614563, 285.128174, 0.790815, 0 -1331.930664,370.903687, 1446.262573, 638.119202, 0.773755, 0 -1254.425293,31.073910, 1352.297241, 312.583282, 0.743923, 0 -915.965088,310.556458, 1031.921265, 624.672302, 0.696823, 0 -499.573517,362.165588, 595.503296, 624.872070, 0.678821, 0 -956.890747,76.389160, 1068.599609, 340.183533, 0.656648, 0 -452.388977,320.288269, 532.330688, 593.987915, 0.652459, 0 -488.305664,1028.187012, 565.136719, 1179.688477, 0.629574, 24 -855.175781,868.482422, 916.516113, 988.196777, 0.555574, 26 -1321.689453,1.638852, 1384.584961, 99.413322, 0.504122, 0 -845.324707,531.824768, 875.939941, 614.515198, 0.472173, 26 -1342.546387,2.096432, 1420.351929, 98.888016, 0.463313, 0 -990.747070,635.389221, 1018.249512, 695.264709, 0.444000, 26 -956.799316,120.643112, 1015.100098, 242.920944, 0.442043, 26 -560.449219,401.270538, 607.763672, 522.486389, 0.434484, 26 -1329.199219,372.522980, 1443.054199, 635.315979, 0.399014, 26 -956.140137,88.526413, 1047.509766, 305.213409, 0.367863, 26 -1379.296875,852.808594, 1406.909180, 916.456055, 0.366000, 26 -1331.909180,468.433624, 1369.299316, 532.044495, 0.352329, 26 -864.880371,915.723633, 916.223145, 990.979980, 0.325205, 26 -260.778809,341.724640, 322.229004, 442.432648, 0.320724, 24 -1271.154785,77.393600, 1336.230469, 186.194870, 0.307823, 26 -``` diff --git a/model_zoo/vision/yolov5lite/cpp/yolov5lite.cc b/model_zoo/vision/yolov5lite/cpp/yolov5lite.cc deleted file mode 100644 index 206143f52..000000000 --- a/model_zoo/vision/yolov5lite/cpp/yolov5lite.cc +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision.h" - -int main() { - namespace vis = fastdeploy::vision; - auto model = vis::ppogg::YOLOv5Lite("yolov5lite.onnx"); - if (!model.Initialized()) { - std::cerr << "Init Failed." << std::endl; - return -1; - } - cv::Mat im = cv::imread("test.jpg"); - cv::Mat vis_im = im.clone(); - // 如果onnx是有decode模块的,需要修改参数 - // model.is_decode_exported = true; - - vis::DetectionResult res; - if (!model.Predict(&im, &res)) { - std::cerr << "Prediction Failed." << std::endl; - return -1; - } - - // 输出预测框结果 - std::cout << res.Str() << std::endl; - - // 可视化预测结果 - vis::Visualize::VisDetection(&vis_im, res); - cv::imwrite("vis_result.jpg", vis_im); - return 0; -} diff --git a/model_zoo/vision/yolov5lite/yolov5lite.py b/model_zoo/vision/yolov5lite/yolov5lite.py deleted file mode 100644 index 683db22f9..000000000 --- a/model_zoo/vision/yolov5lite/yolov5lite.py +++ /dev/null @@ -1,24 +0,0 @@ -import fastdeploy as fd -import cv2 - -# 下载模型和测试图片 -test_jpg_url = "https://raw.githubusercontent.com/ppogg/YOLOv5-Lite/master/cpp_demo/mnn/test.jpg" -fd.download(test_jpg_url, ".", show_progress=True) - -# 加载模型 -model = fd.vision.ppogg.YOLOv5Lite("yolov5lite.onnx") - -# 如果onnx是有decode模块的,需要修改参数 -# model.is_decode_exported = True - -# 预测图片 -im = cv2.imread("test.jpg") -result = model.predict(im, conf_threshold=0.25, nms_iou_threshold=0.5) - -# 可视化结果 -fd.vision.visualize.vis_detection(im, result) -cv2.imwrite("vis_result.jpg", im) - -# 输出预测结果 -print(result) -print(model.runtime_option) diff --git a/model_zoo/vision/yolov6/README.md b/model_zoo/vision/yolov6/README.md deleted file mode 100644 index accc6bdbb..000000000 --- a/model_zoo/vision/yolov6/README.md +++ /dev/null @@ -1,47 +0,0 @@ -# YOLOv6部署示例 - -当前支持模型版本为:[YOLOv6 v0.1.0](https://github.com/meituan/YOLOv6/releases/download/0.1.0) - -本文档说明如何进行[YOLOv6](https://github.com/meituan/YOLOv6)的快速部署推理。本目录结构如下 -``` -. -├── cpp # C++ 代码目录 -│   ├── CMakeLists.txt # C++ 代码编译CMakeLists文件 -│   ├── README.md # C++ 代码编译部署文档 -│   └── yolov6.cc # C++ 示例代码 -├── README.md # YOLOv6 部署文档 -└── yolov6.py # Python示例代码 -``` - -## 安装FastDeploy - -使用如下命令安装FastDeploy,注意到此处安装的是`vision-cpu`,也可根据需求安装`vision-gpu` -``` -# 安装fastdeploy-python工具 -pip install fastdeploy-python - -# 安装vision-cpu模块 -fastdeploy install vision-cpu -``` - -## Python部署 - -执行如下代码即会自动下载YOLOv6模型和测试图片 -``` -python yolov6.py -``` - -执行完成后会将可视化结果保存在本地`vis_result.jpg`,同时输出检测结果如下 -``` -DetectionResult: [xmin, ymin, xmax, ymax, score, label_id] -11.772949,229.269287, 792.933838, 748.294189, 0.954794, 5 -667.140381,396.185455, 807.701721, 881.810120, 0.900997, 0 -223.271011,405.105743, 345.740723, 859.328552, 0.898938, 0 -50.135777,405.863129, 245.485519, 904.153809, 0.888936, 0 -0.000000,549.002869, 77.864723, 869.455017, 0.614145, 0 -``` - -## 其它文档 - -- [C++部署](./cpp/README.md) -- [YOLOv6 API文档](./api.md) diff --git a/model_zoo/vision/yolov6/api.md b/model_zoo/vision/yolov6/api.md deleted file mode 100644 index eca89f06a..000000000 --- a/model_zoo/vision/yolov6/api.md +++ /dev/null @@ -1,71 +0,0 @@ -# YOLOv6 API说明 - -## Python API - -### YOLOv6类 -``` -fastdeploy.vision.meituan.YOLOv6(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX) -``` -YOLOv6模型加载和初始化,当model_format为`fd.Frontend.ONNX`时,只需提供model_file,如`yolov6s.onnx`;当model_format为`fd.Frontend.PADDLE`时,则需同时提供model_file和params_file。 - -**参数** - -> * **model_file**(str): 模型文件路径 -> * **params_file**(str): 参数文件路径 -> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 -> * **model_format**(Frontend): 模型格式 - -#### predict函数 -> ``` -> YOLOv6.predict(image_data, conf_threshold=0.25, nms_iou_threshold=0.5) -> ``` -> 模型预测结口,输入图像直接输出检测结果。 -> -> **参数** -> -> > * **image_data**(np.ndarray): 输入数据,注意需为HWC,BGR格式 -> > * **conf_threshold**(float): 检测框置信度过滤阈值 -> > * **nms_iou_threshold**(float): NMS处理过程中iou阈值 - -示例代码参考[yolov6.py](./yolov6.py) - - -## C++ API - -### YOLOv6类 -``` -fastdeploy::vision::meituan::YOLOv6( - const string& model_file, - const string& params_file = "", - const RuntimeOption& runtime_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX) -``` -YOLOv6模型加载和初始化,当model_format为`Frontend::ONNX`时,只需提供model_file,如`yolov6s.onnx`;当model_format为`Frontend::PADDLE`时,则需同时提供model_file和params_file。 - -**参数** - -> * **model_file**(str): 模型文件路径 -> * **params_file**(str): 参数文件路径 -> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 -> * **model_format**(Frontend): 模型格式 - -#### Predict函数 -> ``` -> YOLOv6::Predict(cv::Mat* im, DetectionResult* result, -> float conf_threshold = 0.25, -> float nms_iou_threshold = 0.5) -> ``` -> 模型预测接口,输入图像直接输出检测结果。 -> -> **参数** -> -> > * **im**: 输入图像,注意需为HWC,BGR格式 -> > * **result**: 检测结果,包括检测框,各个框的置信度 -> > * **conf_threshold**: 检测框置信度过滤阈值 -> > * **nms_iou_threshold**: NMS处理过程中iou阈值 - -示例代码参考[cpp/yolov6.cc](cpp/yolov6.cc) - -## 其它API使用 - -- [模型部署RuntimeOption配置](../../../docs/api/runtime_option.md) diff --git a/model_zoo/vision/yolov6/cpp/CMakeLists.txt b/model_zoo/vision/yolov6/cpp/CMakeLists.txt deleted file mode 100644 index 28987f7f7..000000000 --- a/model_zoo/vision/yolov6/cpp/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -PROJECT(yolov6_demo C CXX) -CMAKE_MINIMUM_REQUIRED (VERSION 3.16) - -# 在低版本ABI环境中,通过如下代码进行兼容性编译 -# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) - -# 指定下载解压后的fastdeploy库路径 -set(FASTDEPLOY_INSTALL_DIR ${PROJECT_SOURCE_DIR}/fastdeploy-linux-x64-0.0.3/) - -include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) - -# 添加FastDeploy依赖头文件 -include_directories(${FASTDEPLOY_INCS}) - -add_executable(yolov6_demo ${PROJECT_SOURCE_DIR}/yolov6.cc) -# 添加FastDeploy库依赖 -target_link_libraries(yolov6_demo ${FASTDEPLOY_LIBS}) diff --git a/model_zoo/vision/yolov6/cpp/README.md b/model_zoo/vision/yolov6/cpp/README.md deleted file mode 100644 index 0e2c03dbf..000000000 --- a/model_zoo/vision/yolov6/cpp/README.md +++ /dev/null @@ -1,31 +0,0 @@ -# 编译YOLOv6示例 - -当前支持模型版本为:[YOLOv6 v0.1.0](https://github.com/meituan/YOLOv6/releases/download/0.1.0) - -``` -# 下载和解压预测库 -wget https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.0.3.tgz -tar xvf fastdeploy-linux-x64-0.0.3.tgz - -# 编译示例代码 -mkdir build & cd build -cmake .. -make -j - -# 下载模型和图片 -wget https://github.com/meituan/YOLOv6/releases/download/0.1.0/yolov6s.onnx -wget https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/bus.jpg - -# 执行 -./yolov6_demo -``` - -执行完后可视化的结果保存在本地`vis_result.jpg`,同时会将检测框输出在终端,如下所示 -``` -DetectionResult: [xmin, ymin, xmax, ymax, score, label_id] -11.772949,229.269287, 792.933838, 748.294189, 0.954794, 5 -667.140381,396.185455, 807.701721, 881.810120, 0.900997, 0 -223.271011,405.105743, 345.740723, 859.328552, 0.898938, 0 -50.135777,405.863129, 245.485519, 904.153809, 0.888936, 0 -0.000000,549.002869, 77.864723, 869.455017, 0.614145, 0 -``` diff --git a/model_zoo/vision/yolov6/cpp/yolov6.cc b/model_zoo/vision/yolov6/cpp/yolov6.cc deleted file mode 100644 index 62d2fa0be..000000000 --- a/model_zoo/vision/yolov6/cpp/yolov6.cc +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision.h" - -int main() { - namespace vis = fastdeploy::vision; - auto model = vis::meituan::YOLOv6("yolov6s.onnx"); - if (!model.Initialized()) { - std::cerr << "Init Failed." << std::endl; - return -1; - } - cv::Mat im = cv::imread("bus.jpg"); - cv::Mat vis_im = im.clone(); - - vis::DetectionResult res; - if (!model.Predict(&im, &res)) { - std::cerr << "Prediction Failed." << std::endl; - return -1; - } - - // 输出预测框结果 - std::cout << res.Str() << std::endl; - - // 可视化预测结果 - vis::Visualize::VisDetection(&vis_im, res); - cv::imwrite("vis_result.jpg", vis_im); - return 0; -} diff --git a/model_zoo/vision/yolov6/yolov6.py b/model_zoo/vision/yolov6/yolov6.py deleted file mode 100644 index fa8aca074..000000000 --- a/model_zoo/vision/yolov6/yolov6.py +++ /dev/null @@ -1,23 +0,0 @@ -import fastdeploy as fd -import cv2 - -# 下载模型和测试图片 -model_url = "https://github.com/meituan/YOLOv6/releases/download/0.1.0/yolov6s.onnx" -test_jpg_url = "https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/bus.jpg" -fd.download(model_url, ".", show_progress=True) -fd.download(test_jpg_url, ".", show_progress=True) - -# 加载模型 -model = fd.vision.meituan.YOLOv6("yolov6s.onnx") - -# 预测图片 -im = cv2.imread("bus.jpg") -result = model.predict(im, conf_threshold=0.25, nms_iou_threshold=0.5) - -# 可视化结果 -fd.vision.visualize.vis_detection(im, result) -cv2.imwrite("vis_result.jpg", im) - -# 输出预测结果 -print(result) -print(model.runtime_option) diff --git a/model_zoo/vision/yolov7/README.md b/model_zoo/vision/yolov7/README.md deleted file mode 100644 index a7165a045..000000000 --- a/model_zoo/vision/yolov7/README.md +++ /dev/null @@ -1,70 +0,0 @@ -# 编译YOLOv7示例 - -当前支持模型版本为:[YOLOv7 v0.1](https://github.com/WongKinYiu/yolov7/releases/tag/v0.1) - -本文档说明如何进行[YOLOv7](https://github.com/WongKinYiu/yolov7)的快速部署推理。本目录结构如下 - -``` -. -├── cpp -│   ├── CMakeLists.txt -│   ├── README.md -│   └── yolov7.cc -├── README.md -└── yolov7.py -``` - -## 获取ONNX文件 - -- 手动获取 - - 访问[YOLOv7](https://github.com/WongKinYiu/yolov7/releases/tag/v0.1)官方github库,按照指引下载安装,下载`yolov7.pt` 模型,利用 `models/export.py` 得到`onnx`格式文件。 - - - - ``` - #下载yolov7模型文件 - wget https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7.pt - - # 导出onnx格式文件 (Tips: 对应 YOLOv7 release v0.1 代码) - python models/export.py --grid --dynamic --weights PATH/TO/yolov7.pt - - # 如果您的代码版本中有支持NMS的ONNX文件导出,请使用如下命令导出ONNX文件(请暂时不要使用 "--end2end",我们后续将支持带有NMS的ONNX模型的部署) - python export.py --grid --dynamic --weights PATH/TO/yolov7.pt - - # 移动onnx文件到demo目录 - cp PATH/TO/yolov7.onnx PATH/TO/model_zoo/vision/yolov7/ - ``` - -## 安装FastDeploy - -使用如下命令安装FastDeploy,注意到此处安装的是`vision-cpu`,也可根据需求安装`vision-gpu` - -``` -# 安装fastdeploy-python工具 -pip install fastdeploy-python - -# 安装vision-cpu模块 -fastdeploy install vision-cpu -``` -## Python部署 - -执行如下代码即会自动下载测试图片 -``` -python yolov7.py -``` - -执行完成后会将可视化结果保存在本地`vis_result.jpg`,同时输出检测结果如下 -``` -DetectionResult: [xmin, ymin, xmax, ymax, score, label_id] -0.056616,191.221619, 314.871063, 409.948914, 0.955449, 17 -432.547852,211.914841, 594.904297, 346.708618, 0.942706, 17 -0.000000,185.456207, 153.967789, 286.157562, 0.860487, 17 -224.049210,195.147003, 419.658234, 364.004852, 0.798262, 17 -369.316986,209.055725, 456.373840, 321.627625, 0.687066, 17 -``` - -## 其它文档 - -- [C++部署](./cpp/README.md) -- [YOLOv7 API文档](./api.md) diff --git a/model_zoo/vision/yolov7/api.md b/model_zoo/vision/yolov7/api.md deleted file mode 100644 index abd2abdce..000000000 --- a/model_zoo/vision/yolov7/api.md +++ /dev/null @@ -1,71 +0,0 @@ -# YOLOv7 API说明 - -## Python API - -### YOLOv7类 -``` -fastdeploy.vision.wongkinyiu.YOLOv7(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX) -``` -YOLOv7模型加载和初始化,当model_format为`fd.Frontend.ONNX`时,只需提供model_file,如`yolov7.onnx`;当model_format为`fd.Frontend.PADDLE`时,则需同时提供model_file和params_file。 - -**参数** - -> * **model_file**(str): 模型文件路径 -> * **params_file**(str): 参数文件路径 -> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 -> * **model_format**(Frontend): 模型格式 - -#### predict函数 -> ``` -> YOLOv7.predict(image_data, conf_threshold=0.25, nms_iou_threshold=0.5) -> ``` -> 模型预测结口,输入图像直接输出检测结果。 -> -> **参数** -> -> > * **image_data**(np.ndarray): 输入数据,注意需为HWC,BGR格式 -> > * **conf_threshold**(float): 检测框置信度过滤阈值 -> > * **nms_iou_threshold**(float): NMS处理过程中iou阈值 - -示例代码参考[yolov7.py](./yolov7.py) - - -## C++ API - -### YOLOv7类 -``` -fastdeploy::vision::wongkinyiu::YOLOv7( - const string& model_file, - const string& params_file = "", - const RuntimeOption& runtime_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX) -``` -YOLOv7模型加载和初始化,当model_format为`Frontend::ONNX`时,只需提供model_file,如`yolov7.onnx`;当model_format为`Frontend::PADDLE`时,则需同时提供model_file和params_file。 - -**参数** - -> * **model_file**(str): 模型文件路径 -> * **params_file**(str): 参数文件路径 -> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 -> * **model_format**(Frontend): 模型格式 - -#### Predict函数 -> ``` -> YOLOv7::Predict(cv::Mat* im, DetectionResult* result, -> float conf_threshold = 0.25, -> float nms_iou_threshold = 0.5) -> ``` -> 模型预测接口,输入图像直接输出检测结果。 -> -> **参数** -> -> > * **im**: 输入图像,注意需为HWC,BGR格式 -> > * **result**: 检测结果,包括检测框,各个框的置信度 -> > * **conf_threshold**: 检测框置信度过滤阈值 -> > * **nms_iou_threshold**: NMS处理过程中iou阈值 - -示例代码参考[cpp/yolov7.cc](cpp/yolov7.cc) - -## 其它API使用 - -- [模型部署RuntimeOption配置](../../../docs/api/runtime_option.md) diff --git a/model_zoo/vision/yolov7/cpp/CMakeLists.txt b/model_zoo/vision/yolov7/cpp/CMakeLists.txt deleted file mode 100644 index ec7c86d02..000000000 --- a/model_zoo/vision/yolov7/cpp/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -PROJECT(yolov7_demo C CXX) -CMAKE_MINIMUM_REQUIRED (VERSION 3.16) - -# 在低版本ABI环境中,通过如下代码进行兼容性编译 -# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) - -# 指定下载解压后的fastdeploy库路径 -set(FASTDEPLOY_INSTALL_DIR ${PROJECT_SOURCE_DIR}/fastdeploy-linux-x64-0.3.0/) - -include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) - -# 添加FastDeploy依赖头文件 -include_directories(${FASTDEPLOY_INCS}) - -add_executable(yolov7_demo ${PROJECT_SOURCE_DIR}/yolov7.cc) -# 添加FastDeploy库依赖 -target_link_libraries(yolov7_demo ${FASTDEPLOY_LIBS}) \ No newline at end of file diff --git a/model_zoo/vision/yolov7/cpp/README.md b/model_zoo/vision/yolov7/cpp/README.md deleted file mode 100644 index 6190b3ae7..000000000 --- a/model_zoo/vision/yolov7/cpp/README.md +++ /dev/null @@ -1,53 +0,0 @@ -# 编译YOLOv7示例 - -当前支持模型版本为:[YOLOv7 v0.1](https://github.com/WongKinYiu/yolov7/releases/tag/v0.1) - -## 获取ONNX文件 - -- 手动获取 - - 访问[YOLOv7](https://github.com/WongKinYiu/yolov7/releases/tag/v0.1)官方github库,按照指引下载安装,下载`yolov7.pt` 模型,利用 `models/export.py` 得到`onnx`格式文件。 - - ``` - #下载yolov7模型文件 - wget https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7.pt - - # 导出onnx格式文件 (Tips: 对应 YOLOv7 release v0.1 代码) - python models/export.py --grid --dynamic --weights PATH/TO/yolov7.pt - - # 如果您的代码版本中有支持NMS的ONNX文件导出,请使用如下命令导出ONNX文件(请暂时不要使用 "--end2end",我们后续将支持带有NMS的ONNX模型的部署) - python export.py --grid --dynamic --weights PATH/TO/yolov7.pt - ``` - - -## 运行demo - -``` -# 下载和解压预测库 -wget https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.0.3.tgz -tar xvf fastdeploy-linux-x64-0.0.3.tgz - -# 编译示例代码 -mkdir build & cd build -cmake .. -make -j - -# 移动onnx文件到demo目录 -cp PATH/TO/yolov7.onnx PATH/TO/model_zoo/vision/yolov7/cpp/build/ - -# 下载图片 -wget https://raw.githubusercontent.com/WongKinYiu/yolov7/main/inference/images/horses.jpg - -# 执行 -./yolov7_demo -``` - -执行完后可视化的结果保存在本地`vis_result.jpg`,同时会将检测框输出在终端,如下所示 -``` -DetectionResult: [xmin, ymin, xmax, ymax, score, label_id] -0.056616,191.221619, 314.871063, 409.948914, 0.955449, 17 -432.547852,211.914841, 594.904297, 346.708618, 0.942706, 17 -0.000000,185.456207, 153.967789, 286.157562, 0.860487, 17 -224.049210,195.147003, 419.658234, 364.004852, 0.798262, 17 -369.316986,209.055725, 456.373840, 321.627625, 0.687066, 17 -``` diff --git a/model_zoo/vision/yolov7/cpp/yolov7.cc b/model_zoo/vision/yolov7/cpp/yolov7.cc deleted file mode 100644 index 8b41c0288..000000000 --- a/model_zoo/vision/yolov7/cpp/yolov7.cc +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision.h" - -int main() { - namespace vis = fastdeploy::vision; - auto model = vis::wongkinyiu::YOLOv7("yolov7.onnx"); - if (!model.Initialized()) { - std::cerr << "Init Failed." << std::endl; - return -1; - } - cv::Mat im = cv::imread("horses.jpg"); - cv::Mat vis_im = im.clone(); - - vis::DetectionResult res; - if (!model.Predict(&im, &res)) { - std::cerr << "Prediction Failed." << std::endl; - return -1; - } - - // 输出预测框结果 - std::cout << res.Str() << std::endl; - - // 可视化预测结果 - vis::Visualize::VisDetection(&vis_im, res); - cv::imwrite("vis_result.jpg", vis_im); - return 0; -} diff --git a/model_zoo/vision/yolov7/yolov7.py b/model_zoo/vision/yolov7/yolov7.py deleted file mode 100644 index cef467622..000000000 --- a/model_zoo/vision/yolov7/yolov7.py +++ /dev/null @@ -1,21 +0,0 @@ -import fastdeploy as fd -import cv2 - -# 下载模型和测试图片 -test_jpg_url = "https://raw.githubusercontent.com/WongKinYiu/yolov7/main/inference/images/horses.jpg" -fd.download(test_jpg_url, ".", show_progress=True) - -# 加载模型 -model = fd.vision.wongkinyiu.YOLOv7("yolov7.onnx") - -# 预测图片 -im = cv2.imread("horses.jpg") -result = model.predict(im, conf_threshold=0.25, nms_iou_threshold=0.5) - -# 可视化结果 -fd.vision.visualize.vis_detection(im, result) -cv2.imwrite("vis_result.jpg", im) - -# 输出预测结果 -print(result) -print(model.runtime_option) diff --git a/model_zoo/vision/yolox/README.md b/model_zoo/vision/yolox/README.md deleted file mode 100644 index d64a2f0ff..000000000 --- a/model_zoo/vision/yolox/README.md +++ /dev/null @@ -1,47 +0,0 @@ -# YOLOX部署示例 - -当前支持模型版本为:[YOLOX v0.1.1](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0) - -本文档说明如何进行[YOLOX](https://github.com/Megvii-BaseDetection/YOLOX)的快速部署推理。本目录结构如下 -``` -. -├── cpp # C++ 代码目录 -│   ├── CMakeLists.txt # C++ 代码编译CMakeLists文件 -│   ├── README.md # C++ 代码编译部署文档 -│   └── yolox.cc # C++ 示例代码 -├── README.md # YOLOX 部署文档 -└── yolox.py # Python示例代码 -``` - -## 安装FastDeploy - -使用如下命令安装FastDeploy,注意到此处安装的是`vision-cpu`,也可根据需求安装`vision-gpu` -``` -# 安装fastdeploy-python工具 -pip install fastdeploy-python - -# 安装vision-cpu模块 -fastdeploy install vision-cpu -``` - -## Python部署 - -执行如下代码即会自动下载YOLOX模型和测试图片 -``` -python yolox.py -``` - -执行完成后会将可视化结果保存在本地`vis_result.jpg`,同时输出检测结果如下 -``` -DetectionResult: [xmin, ymin, xmax, ymax, score, label_id] -17.151855,225.294434, 805.329712, 735.578613, 0.940478, 5 -671.162109,387.403961, 809.000000, 879.525513, 0.909566, 0 -54.373432,400.188110, 204.652756, 893.662537, 0.894507, 0 -221.339310,406.614960, 347.045593, 857.299927, 0.887144, 0 -0.083759,554.987305, 61.894527, 881.098816, 0.450202, 0 -``` - -## 其它文档 - -- [C++部署](./cpp/README.md) -- [YOLOX API文档](./api.md) diff --git a/model_zoo/vision/yolox/api.md b/model_zoo/vision/yolox/api.md deleted file mode 100644 index c7a6f254b..000000000 --- a/model_zoo/vision/yolox/api.md +++ /dev/null @@ -1,71 +0,0 @@ -# YOLOX API说明 - -## Python API - -### YOLOX类 -``` -fastdeploy.vision.megvii.YOLOX(model_file, params_file=None, runtime_option=None, model_format=fd.Frontend.ONNX) -``` -YOLOX模型加载和初始化,当model_format为`fd.Frontend.ONNX`时,只需提供model_file,如`yolox_s.onnx`;当model_format为`fd.Frontend.PADDLE`时,则需同时提供model_file和params_file。 - -**参数** - -> * **model_file**(str): 模型文件路径 -> * **params_file**(str): 参数文件路径 -> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 -> * **model_format**(Frontend): 模型格式 - -#### predict函数 -> ``` -> YOLOX.predict(image_data, conf_threshold=0.25, nms_iou_threshold=0.5) -> ``` -> 模型预测结口,输入图像直接输出检测结果。 -> -> **参数** -> -> > * **image_data**(np.ndarray): 输入数据,注意需为HWC,BGR格式 -> > * **conf_threshold**(float): 检测框置信度过滤阈值 -> > * **nms_iou_threshold**(float): NMS处理过程中iou阈值 - -示例代码参考[yolox.py](./yolox.py) - - -## C++ API - -### YOLOX类 -``` -fastdeploy::vision::megvii::YOLOX( - const string& model_file, - const string& params_file = "", - const RuntimeOption& runtime_option = RuntimeOption(), - const Frontend& model_format = Frontend::ONNX) -``` -YOLOX模型加载和初始化,当model_format为`Frontend::ONNX`时,只需提供model_file,如`yolox_s.onnx`;当model_format为`Frontend::PADDLE`时,则需同时提供model_file和params_file。 - -**参数** - -> * **model_file**(str): 模型文件路径 -> * **params_file**(str): 参数文件路径 -> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 -> * **model_format**(Frontend): 模型格式 - -#### Predict函数 -> ``` -> YOLOX::Predict(cv::Mat* im, DetectionResult* result, -> float conf_threshold = 0.25, -> float nms_iou_threshold = 0.5) -> ``` -> 模型预测接口,输入图像直接输出检测结果。 -> -> **参数** -> -> > * **im**: 输入图像,注意需为HWC,BGR格式 -> > * **result**: 检测结果,包括检测框,各个框的置信度 -> > * **conf_threshold**: 检测框置信度过滤阈值 -> > * **nms_iou_threshold**: NMS处理过程中iou阈值 - -示例代码参考[cpp/yolox.cc](cpp/yolox.cc) - -## 其它API使用 - -- [模型部署RuntimeOption配置](../../../docs/api/runtime_option.md) diff --git a/model_zoo/vision/yolox/cpp/CMakeLists.txt b/model_zoo/vision/yolox/cpp/CMakeLists.txt deleted file mode 100644 index 67bf0f2da..000000000 --- a/model_zoo/vision/yolox/cpp/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -PROJECT(yolox_demo C CXX) -CMAKE_MINIMUM_REQUIRED (VERSION 3.16) - -# 在低版本ABI环境中,通过如下代码进行兼容性编译 -# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) - -# 指定下载解压后的fastdeploy库路径 -set(FASTDEPLOY_INSTALL_DIR /fastdeploy/CustomOp/FastDeploy/build1/fastdeploy-linux-x64-gpu-0.3.0) - -include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) - -# 添加FastDeploy依赖头文件 -include_directories(${FASTDEPLOY_INCS}) - -add_executable(yolox_demo ${PROJECT_SOURCE_DIR}/yolox.cc) -# 添加FastDeploy库依赖 -target_link_libraries(yolox_demo ${FASTDEPLOY_LIBS}) diff --git a/model_zoo/vision/yolox/cpp/README.md b/model_zoo/vision/yolox/cpp/README.md deleted file mode 100644 index cc48878f6..000000000 --- a/model_zoo/vision/yolox/cpp/README.md +++ /dev/null @@ -1,31 +0,0 @@ -# 编译YOLOX示例 - -当前支持模型版本为:[YOLOX v0.1.1](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0) - -``` -# 下载和解压预测库 -wget https://bj.bcebos.com/paddle2onnx/fastdeploy/fastdeploy-linux-x64-0.0.3.tgz -tar xvf fastdeploy-linux-x64-0.0.3.tgz - -# 编译示例代码 -mkdir build & cd build -cmake .. -make -j - -# 下载模型和图片 -wget https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_s.onnx -wget https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/bus.jpg - -# 执行 -./yolox_demo -``` - -执行完后可视化的结果保存在本地`vis_result.jpg`,同时会将检测框输出在终端,如下所示 -``` -DetectionResult: [xmin, ymin, xmax, ymax, score, label_id] -17.151855,225.294434, 805.329712, 735.578613, 0.940478, 5 -671.162109,387.403961, 809.000000, 879.525513, 0.909566, 0 -54.373432,400.188110, 204.652756, 893.662537, 0.894507, 0 -221.339310,406.614960, 347.045593, 857.299927, 0.887144, 0 -0.083759,554.987305, 61.894527, 881.098816, 0.450202, 0 -``` diff --git a/model_zoo/vision/yolox/cpp/yolox.cc b/model_zoo/vision/yolox/cpp/yolox.cc deleted file mode 100644 index 934a50bea..000000000 --- a/model_zoo/vision/yolox/cpp/yolox.cc +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/vision.h" - -int main() { - namespace vis = fastdeploy::vision; - auto model = vis::megvii::YOLOX("yolox_s.onnx"); - if (!model.Initialized()) { - std::cerr << "Init Failed." << std::endl; - return -1; - } - cv::Mat im = cv::imread("bus.jpg"); - cv::Mat vis_im = im.clone(); - - vis::DetectionResult res; - if (!model.Predict(&im, &res)) { - std::cerr << "Prediction Failed." << std::endl; - return -1; - } - - // 输出预测框结果 - std::cout << res.Str() << std::endl; - - // 可视化预测结果 - vis::Visualize::VisDetection(&vis_im, res); - cv::imwrite("vis_result.jpg", vis_im); - return 0; -} diff --git a/model_zoo/vision/yolox/yolox.py b/model_zoo/vision/yolox/yolox.py deleted file mode 100644 index b63675049..000000000 --- a/model_zoo/vision/yolox/yolox.py +++ /dev/null @@ -1,22 +0,0 @@ -import fastdeploy as fd -import cv2 - -# 下载模型和测试图片 -model_url = "https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_s.onnx" -test_jpg_url = "https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/bus.jpg" -fd.download(model_url, ".", show_progress=True) -fd.download(test_jpg_url, ".", show_progress=True) - -# 加载模型 -model = fd.vision.megvii.YOLOX("yolox_s.onnx") - -# 预测图片 -im = cv2.imread("bus.jpg") -result = model.predict(im, conf_threshold=0.25, nms_iou_threshold=0.5) - -# 可视化结果 -fd.vision.visualize.vis_detection(im, result) -cv2.imwrite("vis_result.jpg", im) - -# 输出预测结果 -print(result) diff --git a/sdk_mannager/fastdeploy/__init__.py b/sdk_mannager/fastdeploy/__init__.py deleted file mode 100644 index ff1cb0c72..000000000 --- a/sdk_mannager/fastdeploy/__init__.py +++ /dev/null @@ -1,230 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from six import text_type as _text_type -import logging -import argparse - -# Since the source code is not fully open sourced, -# currently we will provide the prebuilt library -# and demo codes -import os - -__version__ = "0.1.1" - - -def is_installed(package_name): - import pkg_resources - try: - pkg_resources.get_distribution(package_name) - except pkg_resources.DistributionNotFound: - return False - return True - - -def get_version(package_name): - if not is_installed(package_name): - raise Exception("{} is not installed.".format(package_name)) - major = pkg_resources.get_distribution(package).parsed_version.major - micro = pkg_resources.get_distribution(package).parsed_version.micro - minor = pkg_resources.get_distribution(package).parsed_version.minor - return major, micro, minor - - -def parse_arguments(): - parser = argparse.ArgumentParser() - parser.add_argument( - '--model', - type=_text_type, - default=None, - help='Name of model, which can be listed by --list_models') - parser.add_argument( - '--platform', - type=_text_type, - default=None, - help='Define platform, supports Windows/Linux/Android/iOS.') - parser.add_argument( - '--soc', - type=_text_type, - default=None, - help='Define soc for the platform, supports x86/x86-NVIDIA_GPU/ARM/jetson.' - ) - parser.add_argument( - '--save_dir', - type=_text_type, - default=".", - help='Path to download and extract deployment SDK.') - parser.add_argument( - '--list_models', - required=False, - action="store_true", - default=False, - help='List all the supported models.') - parser.add_argument( - '--download_sdk', - required=False, - action="store_true", - default=False, - help='List all the supported models.') - - return parser.parse_args() - - -def read_sources(): - from .download import download, download_and_decompress - user_dir = os.path.expanduser('~') - print("Updating the newest sdk information...") - source_cfgs = "https://bj.bcebos.com/paddlehub/fastdeploy/fastdeploy_newest_sources.cfg.1" - if os.path.exists( - os.path.join(user_dir, "fastdeploy_newest_sources.cfg.1")): - os.remove(os.path.join(user_dir, "fastdeploy_newest_sources.cfg.1")) - download(source_cfgs, user_dir) - categories = dict() - res = dict() - with open(os.path.join(user_dir, "fastdeploy_newest_sources.cfg.1")) as f: - for line in f: - if line.strip().startswith("#"): - continue - if line.strip() == "": - continue - category, model, plat, soc, url = line.strip().split('\t') - if category not in categories: - categories[category] = set() - categories[category].add(model) - if model not in res: - res[model] = dict() - if plat not in res[model]: - res[model][plat] = dict() - if soc not in res[model][plat]: - res[model][plat][soc] = dict() - res[model][plat][soc] = url - return categories, res - - -def main(): - from .download import download, download_and_decompress - - args = parse_arguments() - - if not args.list_models and not args.download_sdk: - print( - "Please use flag --list_models to show all the supported models, or use flag --download_sdk to download the specify SDK to deploy you model." - ) - return - - categories, all_sources = read_sources() - all_models = list(all_sources.keys()) - all_models.sort() - - if args.list_models: - print("Currently, FastDeploy supports {} models, list as below,\n". - format(len(all_models))) - - for k, v in categories.items(): - print("\nModel Category: {}".format(k)) - print("_" * 100) - models = list(categories[k]) - models.sort() - i = 0 - while i < len(models): - if i == len(models) - 1: - print(models[i].center(30)) - i += 1 - elif i == len(models) - 2: - print(models[i].center(30), models[i + 1].center(30)) - i += 2 - else: - print(models[i].center(30), models[i + 1].center(30), - models[i + 2].center(30)) - i += 3 - return - - if not os.path.exists(args.save_dir): - print("The specified save_dir: {} is not exist.".format(args.save_dir)) - return - - if args.model is None or args.model == "": - print( - "Please define --model to choose which kind of model to deploy, use --list_models to show all the supported models." - ) - return - - if args.model not in all_sources: - print( - "{} is not supported, use --list_models to list all the models FastDeploy supported.". - format(args.model)) - return - - if args.platform is None or args.platform == "": - print( - "Please define --platform to choose which platform to deploy, supports windows/linux/android/ios." - ) - return - - if args.platform not in all_sources[args.model]: - print( - "The model:{} only supports platform of {}, {} is not supported now.". - format(args.model, - list(all_sources[args.model].keys()), args.platform)) - return - - if args.soc is None or args.soc == "": - print( - "Please define --soc to choose which hardware to deploy, for model:{} and platform:{}, the available socs are {}.". - format(args.model, args.platform, - list(all_sources[args.model][args.platform].keys()))) - return - - if args.soc not in all_sources[args.model][args.platform]: - print( - "The model:{} in platform:{} only supports soc of {}, {} is not supported now.". - format(args.model, args.platform, - list(all_sources[args.model][args.platform].keys()), - args.soc)) - return - - print("\nDownloading SDK:", - all_sources[args.model][args.platform][args.soc]) - - save_dir = args.save_dir - sdk_name = os.path.split(all_sources[args.model][args.platform][args.soc])[ - -1].strip() - if all_sources[args.model][args.platform][args.soc].count(".zip") > 0: - sdk_name = os.path.split(all_sources[args.model][args.platform][ - args.soc])[-1].strip().split(".zip")[0] - new_save_dir = os.path.join(args.save_dir, sdk_name) - if not os.path.exists(new_save_dir): - os.mkdir(new_save_dir) - save_dir = new_save_dir - download_and_decompress( - all_sources[args.model][args.platform][args.soc], - new_save_dir, - rename=sdk_name + ".zip") - os.remove(os.path.join(new_save_dir, sdk_name + ".zip")) - print("Done. All the files of SDK have been extracted in {}.".format( - new_save_dir)) - - -if __name__ == "__main__": - main() - -if is_installed('cpu_fastdeploy'): - from cpu_fastdeploy import * -elif is_installed('gpu_fastdeploy'): - from gpu_fastdeploy import * -else: - print( - "[INFO] Didn't install cpu/gpu sdk in your environment now, refer https://github.com/PaddlePaddle/FastDeploy/blob/release/0.2.0/docs/server_install.md for more details." - ) diff --git a/sdk_mannager/fastdeploy/__main__.py b/sdk_mannager/fastdeploy/__main__.py deleted file mode 100644 index 6abf5cf8f..000000000 --- a/sdk_mannager/fastdeploy/__main__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import fastdeploy - -if __name__ == "__main__": - fastdeploy.main() diff --git a/sdk_mannager/fastdeploy/download.py b/sdk_mannager/fastdeploy/download.py deleted file mode 100644 index 3b5e92bf3..000000000 --- a/sdk_mannager/fastdeploy/download.py +++ /dev/null @@ -1,186 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import os.path as osp -import shutil -import requests -import time -import zipfile -import hashlib -import tqdm -import logging - -DOWNLOAD_RETRY_LIMIT = 3 - - -def md5check(fullname, md5sum=None): - if md5sum is None: - return True - - logging.info("File {} md5 checking...".format(fullname)) - md5 = hashlib.md5() - with open(fullname, 'rb') as f: - for chunk in iter(lambda: f.read(4096), b""): - md5.update(chunk) - calc_md5sum = md5.hexdigest() - - if calc_md5sum != md5sum: - logging.info("File {} md5 check failed, {}(calc) != " - "{}(base)".format(fullname, calc_md5sum, md5sum)) - return False - return True - - -def move_and_merge_tree(src, dst): - """ - Move src directory to dst, if dst is already exists, - merge src to dst - """ - if not osp.exists(dst): - shutil.move(src, dst) - else: - if not osp.isdir(src): - shutil.move(src, dst) - return - for fp in os.listdir(src): - src_fp = osp.join(src, fp) - dst_fp = osp.join(dst, fp) - if osp.isdir(src_fp): - if osp.isdir(dst_fp): - move_and_merge_tree(src_fp, dst_fp) - else: - shutil.move(src_fp, dst_fp) - elif osp.isfile(src_fp) and \ - not osp.isfile(dst_fp): - shutil.move(src_fp, dst_fp) - - -def download(url, path, rename=None, md5sum=None, show_progress=False): - """ - Download from url, save to path. - url (str): download url - path (str): download to given path - """ - if not osp.exists(path): - os.makedirs(path) - - fname = osp.split(url)[-1] - fullname = osp.join(path, fname) - if rename is not None: - fullname = osp.join(path, rename) - retry_cnt = 0 - while not (osp.exists(fullname) and md5check(fullname, md5sum)): - if retry_cnt < DOWNLOAD_RETRY_LIMIT: - retry_cnt += 1 - else: - logging.debug("{} download failed.".format(fname)) - raise RuntimeError("Download from {} failed. " - "Retry limit reached".format(url)) - - logging.info("Downloading {} from {}".format(fname, url)) - - req = requests.get(url, stream=True) - if req.status_code != 200: - raise RuntimeError("Downloading from {} failed with code " - "{}!".format(url, req.status_code)) - - # For protecting download interupted, download to - # tmp_fullname firstly, move tmp_fullname to fullname - # after download finished - tmp_fullname = fullname + "_tmp" - total_size = req.headers.get('content-length') - with open(tmp_fullname, 'wb') as f: - if total_size and show_progress: - for chunk in tqdm.tqdm( - req.iter_content(chunk_size=1024), - total=(int(total_size) + 1023) // 1024, - unit='KB'): - f.write(chunk) - else: - for chunk in req.iter_content(chunk_size=1024): - if chunk: - f.write(chunk) - shutil.move(tmp_fullname, fullname) - logging.debug("{} download completed.".format(fname)) - - return fullname - - -def decompress(fname): - """ - Decompress for zip and tar file - """ - logging.info("Decompressing {}...".format(fname)) - - # For protecting decompressing interupted, - # decompress to fpath_tmp directory firstly, if decompress - # successed, move decompress files to fpath and delete - # fpath_tmp and remove download compress file. - fpath = osp.split(fname)[0] - fpath_tmp = osp.join(fpath, 'tmp') - if osp.isdir(fpath_tmp): - shutil.rmtree(fpath_tmp) - os.makedirs(fpath_tmp) - - if fname.find('.tar') >= 0 or fname.find('.tgz') >= 0: - with tarfile.open(fname) as tf: - tf.extractall(path=fpath_tmp) - elif fname.find('.zip') >= 0: - with zipfile.ZipFile(fname) as zf: - zf.extractall(path=fpath_tmp) - else: - raise TypeError("Unsupport compress file type {}".format(fname)) - - for f in os.listdir(fpath_tmp): - src_dir = osp.join(fpath_tmp, f) - dst_dir = osp.join(fpath, f) - move_and_merge_tree(src_dir, dst_dir) - - shutil.rmtree(fpath_tmp) - logging.debug("{} decompressed.".format(fname)) - return dst_dir - - -def url2dir(url, path, rename=None): - full_name = download(url, path, rename, show_progress=True) - print("SDK is donwloaded, now extracting...") - if url.count(".tgz") > 0 or url.count(".tar") > 0 or url.count("zip") > 0: - return decompress(full_name) - - -def download_and_decompress(url, path='.', rename=None): - fname = osp.split(url)[-1] - fullname = osp.join(path, fname) - # if url.endswith(('tgz', 'tar.gz', 'tar', 'zip')): - # fullname = osp.join(path, fname.split('.')[0]) - nranks = 0 - if nranks <= 1: - dst_dir = url2dir(url, path, rename) - if dst_dir is not None: - fullname = dst_dir - else: - lock_path = fullname + '.lock' - if not os.path.exists(fullname): - with open(lock_path, 'w'): - os.utime(lock_path, None) - if local_rank == 0: - dst_dir = url2dir(url, path, rename) - if dst_dir is not None: - fullname = dst_dir - os.remove(lock_path) - else: - while os.path.exists(lock_path): - time.sleep(1) - return diff --git a/sdk_mannager/requirements.txt b/sdk_mannager/requirements.txt deleted file mode 100644 index 5f64a9a19..000000000 --- a/sdk_mannager/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -tqdm -six diff --git a/sdk_mannager/setup.py b/sdk_mannager/setup.py deleted file mode 100644 index 787071a1b..000000000 --- a/sdk_mannager/setup.py +++ /dev/null @@ -1,36 +0,0 @@ -import setuptools -import fastdeploy -import io -import os - -with open("requirements.txt") as fin: - REQUIRED_PACKAGES = fin.read() - - -def read(*names, **kwargs): - with io.open( - os.path.join(os.path.dirname(__file__), *names), - encoding=kwargs.get("encoding", "utf8")) as fp: - return fp.read() - - -setuptools.setup( - name="fastdeploy-python", - version=fastdeploy.__version__, - author="FastDeploy", - author_email="fastdeploy@baidu.com", - description="FastDeploy is a toolkit to deploy deeplearning models.", - long_description=read("../README.md"), - long_description_content_type="text/markdown", - url="https://github.com/PaddlePaddle/FastDeploy", - packages=setuptools.find_packages(), - install_requires=REQUIRED_PACKAGES, - classifiers=[ - "Programming Language :: Python :: 3", - "License :: OSI Approved :: Apache Software License", - "Operating System :: OS Independent", - ], - license='Apache 2.0', - entry_points={ - 'console_scripts': ['fastdeploy=fastdeploy.__init__:main', ] - })