From e78faf951608174d7dd77a25a304b1426a75303c Mon Sep 17 00:00:00 2001 From: yunyaoXYY Date: Thu, 8 Dec 2022 07:18:54 +0000 Subject: [PATCH 01/30] Fix links in readme --- tools/common_tools/auto_compression/README.md | 12 ++++++------ tools/common_tools/auto_compression/README_EN.md | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tools/common_tools/auto_compression/README.md b/tools/common_tools/auto_compression/README.md index d4b2c2546..a990eeaa3 100644 --- a/tools/common_tools/auto_compression/README.md +++ b/tools/common_tools/auto_compression/README.md @@ -112,14 +112,14 @@ FastDeploy目前为用户提供了多个模型的压缩[config](./configs/)文 ## 4. FastDeploy 部署量化模型 用户在获得量化模型之后,即可以使用FastDeploy进行部署, 部署文档请参考: 具体请用户参考示例文档: -- [YOLOv5 量化模型部署](../../examples/vision/detection/yolov5/quantize/) +- [YOLOv5 量化模型部署](../../../examples/vision/detection/yolov5/quantize/) -- [YOLOv6 量化模型部署](../../examples/vision/detection/yolov6/quantize/) +- [YOLOv6 量化模型部署](../../../examples/vision/detection/yolov6/quantize/) -- [YOLOv7 量化模型部署](../../examples/vision/detection/yolov7/quantize/) +- [YOLOv7 量化模型部署](../../../examples/vision/detection/yolov7/quantize/) -- [PadddleClas 量化模型部署](../../examples/vision/classification/paddleclas/quantize/) +- [PadddleClas 量化模型部署](../../../examples/vision/classification/paddleclas/quantize/) -- [PadddleDetection 量化模型部署](../../examples/vision/detection/paddledetection/quantize/) +- [PadddleDetection 量化模型部署](../../../examples/vision/detection/paddledetection/quantize/) -- [PadddleSegmentation 量化模型部署](../../examples/vision/segmentation/paddleseg/quantize/) +- [PadddleSegmentation 量化模型部署](../../../examples/vision/segmentation/paddleseg/quantize/) diff --git a/tools/common_tools/auto_compression/README_EN.md b/tools/common_tools/auto_compression/README_EN.md index f68022c13..bf6133faa 100644 --- a/tools/common_tools/auto_compression/README_EN.md +++ b/tools/common_tools/auto_compression/README_EN.md @@ -121,10 +121,10 @@ FastDeploy currently provides users with compression [config](./configs/) files Once obtained the quantized model, developers can deploy it on FastDeploy. Please refer to the following docs for more details -- [YOLOv5 Quantized Model Deployment](../../examples/vision/detection/yolov5/quantize/) +- [YOLOv5 Quantized Model Deployment](../../../examples/vision/detection/yolov5/quantize/) -- [YOLOv6 Quantized Model Deployment](../../examples/vision/detection/yolov6/quantize/) +- [YOLOv6 Quantized Model Deployment](../../../examples/vision/detection/yolov6/quantize/) -- [YOLOv7 Quantized Model Deployment](../../examples/vision/detection/yolov7/quantize/) +- [YOLOv7 Quantized Model Deployment](../../../examples/vision/detection/yolov7/quantize/) -- [PadddleClas Quantized Model Deployment](../../examples/vision/classification/paddleclas/quantize/) +- [PadddleClas Quantized Model Deployment](../../../examples/vision/classification/paddleclas/quantize/) From 518fd782100ae81fffeff6ee2166e0ee71014884 Mon Sep 17 00:00:00 2001 From: yunyaoXYY Date: Thu, 8 Dec 2022 07:22:41 +0000 Subject: [PATCH 02/30] Fix links in readme --- tools/common_tools/auto_compression/README_EN.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/common_tools/auto_compression/README_EN.md b/tools/common_tools/auto_compression/README_EN.md index bf6133faa..9c591208d 100644 --- a/tools/common_tools/auto_compression/README_EN.md +++ b/tools/common_tools/auto_compression/README_EN.md @@ -128,3 +128,7 @@ Once obtained the quantized model, developers can deploy it on FastDeploy. Pleas - [YOLOv7 Quantized Model Deployment](../../../examples/vision/detection/yolov7/quantize/) - [PadddleClas Quantized Model Deployment](../../../examples/vision/classification/paddleclas/quantize/) + +- [PadddleDetection Quantized Model Deployment](../../../examples/vision/detection/paddledetection/quantize/) + +- [PadddleSegmentation Quantized Model Deployment](../../../examples/vision/segmentation/paddleseg/quantize/) From 1b590259a40251be63dee0104210851fd554972f Mon Sep 17 00:00:00 2001 From: yunyaoXYY Date: Thu, 8 Dec 2022 13:32:07 +0000 Subject: [PATCH 03/30] Update PPOCRv2/v3 examples --- examples/vision/ocr/PP-OCRv2/cpp/infer.cc | 17 +++++++++++++--- examples/vision/ocr/PP-OCRv2/python/infer.py | 21 ++++++++++++++++---- examples/vision/ocr/PP-OCRv3/cpp/infer.cc | 18 ++++++++++++++--- examples/vision/ocr/PP-OCRv3/python/infer.py | 21 ++++++++++++++++---- 4 files changed, 63 insertions(+), 14 deletions(-) diff --git a/examples/vision/ocr/PP-OCRv2/cpp/infer.cc b/examples/vision/ocr/PP-OCRv2/cpp/infer.cc index 7bac320d5..6cde6390f 100644 --- a/examples/vision/ocr/PP-OCRv2/cpp/infer.cc +++ b/examples/vision/ocr/PP-OCRv2/cpp/infer.cc @@ -33,13 +33,18 @@ void InitAndInfer(const std::string& det_model_dir, const std::string& cls_model auto cls_option = option; auto rec_option = option; + // The cls and rec model can inference a batch of images now. + // User could initialize the inference batch size and set them after create PPOCR model. + int cls_batch_size = 1; + int rec_batch_size = 6; + // If use TRT backend, the dynamic shape will be set as follow. // We recommend that users set the length and height of the detection model to a multiple of 32. det_option.SetTrtInputShape("x", {1, 3, 64,64}, {1, 3, 640, 640}, {1, 3, 960, 960}); - cls_option.SetTrtInputShape("x", {1, 3, 48, 10}, {10, 3, 48, 320}, {32, 3, 48, 1024}); - rec_option.SetTrtInputShape("x", {1, 3, 32, 10}, {10, 3, 32, 320}, - {32, 3, 32, 2304}); + cls_option.SetTrtInputShape("x", {1, 3, 48, 10}, {cls_batch_size, 3, 48, 320}, {cls_batch_size, 3, 48, 1024}); + rec_option.SetTrtInputShape("x", {1, 3, 32, 10}, {rec_batch_size, 3, 32, 320}, + {rec_batch_size, 3, 32, 2304}); // Users could save TRT cache file to disk as follow. // det_option.SetTrtCacheFile(det_model_dir + sep + "det_trt_cache.trt"); @@ -58,6 +63,12 @@ void InitAndInfer(const std::string& det_model_dir, const std::string& cls_model // auto ppocr_v2 = fastdeploy::pipeline::PPOCRv2(&det_model, &rec_model); auto ppocr_v2 = fastdeploy::pipeline::PPOCRv2(&det_model, &cls_model, &rec_model); + // Set inference batch size for cls model and rec model, the value could be -1 and 1 to positive infinity. + // When inference batch size is set to -1, it means that the inference batch size + // of the cls and rec models will be the same as the number of boxes detected by the det model. + ppocr_v2.SetClsBatchSize(cls_batch_size); + ppocr_v2.SetRecBatchSize(rec_batch_size); + if(!ppocr_v2.Initialized()){ std::cerr << "Failed to initialize PP-OCR." << std::endl; return; diff --git a/examples/vision/ocr/PP-OCRv2/python/infer.py b/examples/vision/ocr/PP-OCRv2/python/infer.py index af915143a..1487d795f 100644 --- a/examples/vision/ocr/PP-OCRv2/python/infer.py +++ b/examples/vision/ocr/PP-OCRv2/python/infer.py @@ -106,6 +106,11 @@ rec_label_file = args.rec_label_file # 用户也可根据自行需求分别配置 runtime_option = build_option(args) +# PPOCR的cls和rec模型现在已经支持推理一个Batch的数据 +# 定义下面两个变量后, 可用于设置trt输入shape, 并在PPOCR模型初始化后, 完成Batch推理设置 +cls_batch_size = 1 +rec_batch_size = 6 + # 当使用TRT时,分别给三个模型的runtime设置动态shape,并完成模型的创建. # 注意: 需要在检测模型创建完成后,再设置分类模型的动态输入并创建分类模型, 识别模型同理. # 如果用户想要自己改动检测模型的输入shape, 我们建议用户把检测模型的长和高设置为32的倍数. @@ -118,16 +123,18 @@ det_model = fd.vision.ocr.DBDetector( det_model_file, det_params_file, runtime_option=det_option) cls_option = runtime_option -cls_option.set_trt_input_shape("x", [1, 3, 48, 10], [10, 3, 48, 320], - [32, 3, 48, 1024]) +cls_option.set_trt_input_shape("x", [1, 3, 48, 10], + [cls_batch_size, 3, 48, 320], + [cls_batch_size, 3, 48, 1024]) # 用户可以把TRT引擎文件保存至本地 # cls_option.set_trt_cache_file(args.cls_model + "/cls_trt_cache.trt") cls_model = fd.vision.ocr.Classifier( cls_model_file, cls_params_file, runtime_option=cls_option) rec_option = runtime_option -rec_option.set_trt_input_shape("x", [1, 3, 32, 10], [10, 3, 32, 320], - [32, 3, 32, 2304]) +rec_option.set_trt_input_shape("x", [1, 3, 32, 10], + [rec_batch_size, 3, 32, 320], + [rec_batch_size, 3, 32, 2304]) # 用户可以把TRT引擎文件保存至本地 # rec_option.set_trt_cache_file(args.rec_model + "/rec_trt_cache.trt") rec_model = fd.vision.ocr.Recognizer( @@ -137,6 +144,12 @@ rec_model = fd.vision.ocr.Recognizer( ppocr_v2 = fd.vision.ocr.PPOCRv2( det_model=det_model, cls_model=cls_model, rec_model=rec_model) +# 给cls和rec模型设置推理时的batch size +# 此值能为-1, 和1到正无穷 +# 当此值为-1时, cls和rec模型的batch size将默认和det模型检测出的框的数量相同 +ppocr_v2.cls_batch_size = cls_batch_size +ppocr_v2.rec_batch_size = rec_batch_size + # 预测图片准备 im = cv2.imread(args.image) diff --git a/examples/vision/ocr/PP-OCRv3/cpp/infer.cc b/examples/vision/ocr/PP-OCRv3/cpp/infer.cc index 911b311e3..90b77679f 100644 --- a/examples/vision/ocr/PP-OCRv3/cpp/infer.cc +++ b/examples/vision/ocr/PP-OCRv3/cpp/infer.cc @@ -33,13 +33,19 @@ void InitAndInfer(const std::string& det_model_dir, const std::string& cls_model auto cls_option = option; auto rec_option = option; + // The cls and rec model can inference a batch of images now. + // User could initialize the inference batch size and set them after create PPOCR model. + int cls_batch_size = 1; + int rec_batch_size = 6; + // If use TRT backend, the dynamic shape will be set as follow. // We recommend that users set the length and height of the detection model to a multiple of 32. + // We also recommend that users set the Trt input shape as follow. det_option.SetTrtInputShape("x", {1, 3, 64,64}, {1, 3, 640, 640}, {1, 3, 960, 960}); - cls_option.SetTrtInputShape("x", {1, 3, 48, 10}, {10, 3, 48, 320}, {64, 3, 48, 1024}); - rec_option.SetTrtInputShape("x", {1, 3, 48, 10}, {10, 3, 48, 320}, - {64, 3, 48, 2304}); + cls_option.SetTrtInputShape("x", {1, 3, 48, 10}, {cls_batch_size, 3, 48, 320}, {cls_batch_size, 3, 48, 1024}); + rec_option.SetTrtInputShape("x", {1, 3, 48, 10}, {rec_batch_size, 3, 48, 320}, + {rec_batch_size, 3, 48, 2304}); // Users could save TRT cache file to disk as follow. // det_option.SetTrtCacheFile(det_model_dir + sep + "det_trt_cache.trt"); @@ -57,6 +63,12 @@ void InitAndInfer(const std::string& det_model_dir, const std::string& cls_model // The classification model is optional, so the PP-OCR can also be connected in series as follows // auto ppocr_v3 = fastdeploy::pipeline::PPOCRv3(&det_model, &rec_model); auto ppocr_v3 = fastdeploy::pipeline::PPOCRv3(&det_model, &cls_model, &rec_model); + + // Set inference batch size for cls model and rec model, the value could be -1 and 1 to positive infinity. + // When inference batch size is set to -1, it means that the inference batch size + // of the cls and rec models will be the same as the number of boxes detected by the det model. + ppocr_v3.SetClsBatchSize(cls_batch_size); + ppocr_v3.SetRecBatchSize(rec_batch_size); if(!ppocr_v3.Initialized()){ std::cerr << "Failed to initialize PP-OCR." << std::endl; diff --git a/examples/vision/ocr/PP-OCRv3/python/infer.py b/examples/vision/ocr/PP-OCRv3/python/infer.py index b6b27b240..1ec962cb5 100644 --- a/examples/vision/ocr/PP-OCRv3/python/infer.py +++ b/examples/vision/ocr/PP-OCRv3/python/infer.py @@ -106,6 +106,11 @@ rec_label_file = args.rec_label_file # 用户也可根据自行需求分别配置 runtime_option = build_option(args) +# PPOCR的cls和rec模型现在已经支持推理一个Batch的数据 +# 定义下面两个变量后, 可用于设置trt输入shape, 并在PPOCR模型初始化后, 完成Batch推理设置 +cls_batch_size = 1 +rec_batch_size = 6 + # 当使用TRT时,分别给三个模型的runtime设置动态shape,并完成模型的创建. # 注意: 需要在检测模型创建完成后,再设置分类模型的动态输入并创建分类模型, 识别模型同理. # 如果用户想要自己改动检测模型的输入shape, 我们建议用户把检测模型的长和高设置为32的倍数. @@ -118,16 +123,18 @@ det_model = fd.vision.ocr.DBDetector( det_model_file, det_params_file, runtime_option=det_option) cls_option = runtime_option -cls_option.set_trt_input_shape("x", [1, 3, 48, 10], [10, 3, 48, 320], - [64, 3, 48, 1024]) +cls_option.set_trt_input_shape("x", [1, 3, 48, 10], + [cls_batch_size, 3, 48, 320], + [cls_batch_size, 3, 48, 1024]) # 用户可以把TRT引擎文件保存至本地 # cls_option.set_trt_cache_file(args.cls_model + "/cls_trt_cache.trt") cls_model = fd.vision.ocr.Classifier( cls_model_file, cls_params_file, runtime_option=cls_option) rec_option = runtime_option -rec_option.set_trt_input_shape("x", [1, 3, 48, 10], [10, 3, 48, 320], - [64, 3, 48, 2304]) +rec_option.set_trt_input_shape("x", [1, 3, 48, 10], + [rec_batch_size, 3, 48, 320], + [rec_batch_size, 3, 48, 2304]) # 用户可以把TRT引擎文件保存至本地 # rec_option.set_trt_cache_file(args.rec_model + "/rec_trt_cache.trt") rec_model = fd.vision.ocr.Recognizer( @@ -137,6 +144,12 @@ rec_model = fd.vision.ocr.Recognizer( ppocr_v3 = fd.vision.ocr.PPOCRv3( det_model=det_model, cls_model=cls_model, rec_model=rec_model) +# 给cls和rec模型设置推理时的batch size +# 此值能为-1, 和1到正无穷 +# 当此值为-1时, cls和rec模型的batch size将默认和det模型检测出的框的数量相同 +ppocr_v3.cls_batch_size = cls_batch_size +ppocr_v3.rec_batch_size = rec_batch_size + # 预测图片准备 im = cv2.imread(args.image) From e9869267ddb62e2a07ff9bd2fe30f5f28df542bc Mon Sep 17 00:00:00 2001 From: yunyaoXYY Date: Fri, 9 Dec 2022 10:29:28 +0000 Subject: [PATCH 04/30] Update auto compression configs --- tools/README.md | 7 +++---- tools/README_EN.md | 7 +++---- tools/common_tools/auto_compression/README.md | 7 +++---- tools/common_tools/auto_compression/README_EN.md | 9 +++------ tools/common_tools/auto_compression/configs/README.md | 2 +- tools/common_tools/auto_compression/configs/README_EN.md | 2 +- .../configs/classification/mobilenetv1_ssld_quant.yaml | 2 +- .../configs/classification/resnet50_vd_quant.yaml | 2 +- .../configs/detection/ppyoloe_plus_withNMS_quant.yaml | 2 +- .../configs/detection/ppyoloe_withNMS_quant.yaml | 2 +- .../configs/detection/yolov5s_quant.yaml | 2 +- .../configs/detection/yolov6s_quant.yaml | 3 ++- .../auto_compression/configs/detection/yolov7_quant.yaml | 2 +- .../configs/segmentation/pp_liteseg_quant.yaml | 2 +- 14 files changed, 23 insertions(+), 28 deletions(-) diff --git a/tools/README.md b/tools/README.md index 7a0e5b668..07c7ac8a6 100755 --- a/tools/README.md +++ b/tools/README.md @@ -11,15 +11,14 @@ FastDeploy提供了一系列高效易用的工具优化部署体验, 提升推 FastDeploy基于PaddleSlim的Auto Compression Toolkit(ACT), 给用户提供了一键模型自动化压缩的工具, 用户可以轻松地通过一行命令对模型进行自动化压缩, 并在FastDeploy上部署压缩后的模型, 提升推理速度. 本文档将以FastDeploy一键模型自动化压缩工具为例, 介绍如何安装此工具, 并提供相应的使用文档. ### 环境准备 -1.用户参考PaddlePaddle官网, 安装develop版本 +1.用户参考PaddlePaddle官网, 安装Paddle 2.4 版本 ``` https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/develop/install/pip/linux-pip.html ``` -2.安装PaddleSlim develop版本 +2.安装PaddleSlim 2.4 版本 ```bash -git clone https://github.com/PaddlePaddle/PaddleSlim.git & cd PaddleSlim -python setup.py install +pip install paddleslim==2.4.0 ``` 3.安装fastdeploy-tools工具包 diff --git a/tools/README_EN.md b/tools/README_EN.md index 5c1e2501f..422b477d4 100755 --- a/tools/README_EN.md +++ b/tools/README_EN.md @@ -11,15 +11,14 @@ FastDeploy provides a series of efficient and easy-to-use tools to optimize the Based on PaddleSlim's Auto Compression Toolkit (ACT), FastDeploy provides users with a one-click model automation compression tool that allows users to easily compress the model with a single command. This document will take FastDeploy's one-click model automation compression tool as an example, introduce how to install the tool, and provide the corresponding documentation for usage. ### Environmental Preparation -1.Install PaddlePaddle develop version +1.Install PaddlePaddle 2.4 version ``` https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/develop/install/pip/linux-pip.html ``` -2.Install PaddleSlim dev version +2.Install PaddleSlim 2.4 version ```bash -git clone https://github.com/PaddlePaddle/PaddleSlim.git & cd PaddleSlim -python setup.py install +pip install paddleslim==2.4.0 ``` 3.Install fastdeploy-tools package diff --git a/tools/common_tools/auto_compression/README.md b/tools/common_tools/auto_compression/README.md index a990eeaa3..7c1b3feac 100644 --- a/tools/common_tools/auto_compression/README.md +++ b/tools/common_tools/auto_compression/README.md @@ -6,15 +6,14 @@ FastDeploy基于PaddleSlim的Auto Compression Toolkit(ACT), 给用户提供了 ### 环境依赖 -1.用户参考PaddlePaddle官网, 安装develop版本 +1.用户参考PaddlePaddle官网, 安装Paddle 2.4 版本 ``` https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/develop/install/pip/linux-pip.html ``` -2.安装paddleslim-develop版本 +2.安装PaddleSlim 2.4 版本 ```bash -git clone https://github.com/PaddlePaddle/PaddleSlim.git & cd PaddleSlim -python setup.py install +pip install paddleslim==2.4.0 ``` ### 一键模型自动化压缩工具安装方式 diff --git a/tools/common_tools/auto_compression/README_EN.md b/tools/common_tools/auto_compression/README_EN.md index 9c591208d..fb1480819 100644 --- a/tools/common_tools/auto_compression/README_EN.md +++ b/tools/common_tools/auto_compression/README_EN.md @@ -7,17 +7,14 @@ We take the Yolov5 series as an example to demonstrate how to install and execut ### Environment Dependencies -1. Install the develop version downloaded from PaddlePaddle official website. - +1.Install PaddlePaddle 2.4 version ``` https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/develop/install/pip/linux-pip.html ``` -2.Install PaddleSlim-develop - +2.Install PaddleSlim 2.4 version ```bash -git clone https://github.com/PaddlePaddle/PaddleSlim.git & cd PaddleSlim -python setup.py install +pip install paddleslim==2.4.0 ``` ### Install Fastdeploy Auto Compression Toolkit diff --git a/tools/common_tools/auto_compression/configs/README.md b/tools/common_tools/auto_compression/configs/README.md index 992402656..3c71ad41d 100644 --- a/tools/common_tools/auto_compression/configs/README.md +++ b/tools/common_tools/auto_compression/configs/README.md @@ -24,7 +24,7 @@ Distillation: alpha: 1.0 #蒸馏loss所占权重 loss: soft_label #蒸馏loss算法 -Quantization: +QuantAware: onnx_format: true #是否采用ONNX量化标准格式, 要在FastDeploy上部署, 必须选true use_pact: true #量化训练是否使用PACT方法 activation_quantize_type: 'moving_average_abs_max' #激活量化方式 diff --git a/tools/common_tools/auto_compression/configs/README_EN.md b/tools/common_tools/auto_compression/configs/README_EN.md index c1066409c..8381105eb 100644 --- a/tools/common_tools/auto_compression/configs/README_EN.md +++ b/tools/common_tools/auto_compression/configs/README_EN.md @@ -26,7 +26,7 @@ Distillation: alpha: 1.0 #Distillation loss weight loss: soft_label #Distillation loss algorithm -Quantization: +QuantAware: onnx_format: true #Whether to use ONNX quantization standard format or not, must be true to deploy on FastDeploy use_pact: true #Whether to use the PACT method for training activation_quantize_type: 'moving_average_abs_max' #Activations quantization methods diff --git a/tools/common_tools/auto_compression/configs/classification/mobilenetv1_ssld_quant.yaml b/tools/common_tools/auto_compression/configs/classification/mobilenetv1_ssld_quant.yaml index 7fa979110..22fe14c63 100644 --- a/tools/common_tools/auto_compression/configs/classification/mobilenetv1_ssld_quant.yaml +++ b/tools/common_tools/auto_compression/configs/classification/mobilenetv1_ssld_quant.yaml @@ -17,7 +17,7 @@ Distillation: - softmax_0.tmp_0 -Quantization: +QuantAware: use_pact: true activation_bits: 8 is_full_quantize: false diff --git a/tools/common_tools/auto_compression/configs/classification/resnet50_vd_quant.yaml b/tools/common_tools/auto_compression/configs/classification/resnet50_vd_quant.yaml index 6de409ac7..4197bf3d5 100644 --- a/tools/common_tools/auto_compression/configs/classification/resnet50_vd_quant.yaml +++ b/tools/common_tools/auto_compression/configs/classification/resnet50_vd_quant.yaml @@ -16,7 +16,7 @@ Distillation: node: - softmax_0.tmp_0 -Quantization: +QuantAware: use_pact: true activation_bits: 8 is_full_quantize: false diff --git a/tools/common_tools/auto_compression/configs/detection/ppyoloe_plus_withNMS_quant.yaml b/tools/common_tools/auto_compression/configs/detection/ppyoloe_plus_withNMS_quant.yaml index 2b9f8d7a4..8c4f98257 100644 --- a/tools/common_tools/auto_compression/configs/detection/ppyoloe_plus_withNMS_quant.yaml +++ b/tools/common_tools/auto_compression/configs/detection/ppyoloe_plus_withNMS_quant.yaml @@ -14,7 +14,7 @@ Distillation: alpha: 1.0 loss: soft_label -Quantization: +QuantAware: onnx_format: true use_pact: true activation_quantize_type: 'moving_average_abs_max' diff --git a/tools/common_tools/auto_compression/configs/detection/ppyoloe_withNMS_quant.yaml b/tools/common_tools/auto_compression/configs/detection/ppyoloe_withNMS_quant.yaml index e46c11b27..2d1a2536b 100644 --- a/tools/common_tools/auto_compression/configs/detection/ppyoloe_withNMS_quant.yaml +++ b/tools/common_tools/auto_compression/configs/detection/ppyoloe_withNMS_quant.yaml @@ -14,7 +14,7 @@ Distillation: alpha: 1.0 loss: soft_label -Quantization: +QuantAware: onnx_format: true use_pact: true activation_quantize_type: 'moving_average_abs_max' diff --git a/tools/common_tools/auto_compression/configs/detection/yolov5s_quant.yaml b/tools/common_tools/auto_compression/configs/detection/yolov5s_quant.yaml index 72cfef9ba..9d6b507cf 100644 --- a/tools/common_tools/auto_compression/configs/detection/yolov5s_quant.yaml +++ b/tools/common_tools/auto_compression/configs/detection/yolov5s_quant.yaml @@ -14,7 +14,7 @@ Distillation: alpha: 1.0 loss: soft_label -Quantization: +QuantAware: onnx_format: true use_pact: true activation_quantize_type: 'moving_average_abs_max' diff --git a/tools/common_tools/auto_compression/configs/detection/yolov6s_quant.yaml b/tools/common_tools/auto_compression/configs/detection/yolov6s_quant.yaml index ee4986312..44b8bd472 100644 --- a/tools/common_tools/auto_compression/configs/detection/yolov6s_quant.yaml +++ b/tools/common_tools/auto_compression/configs/detection/yolov6s_quant.yaml @@ -14,12 +14,13 @@ Distillation: alpha: 1.0 loss: soft_label -Quantization: +QuantAware: onnx_format: true activation_quantize_type: 'moving_average_abs_max' quantize_op_types: - conv2d - depthwise_conv2d + - conv2d_transpose PTQ: diff --git a/tools/common_tools/auto_compression/configs/detection/yolov7_quant.yaml b/tools/common_tools/auto_compression/configs/detection/yolov7_quant.yaml index e6cc2c9a9..5e056e94c 100644 --- a/tools/common_tools/auto_compression/configs/detection/yolov7_quant.yaml +++ b/tools/common_tools/auto_compression/configs/detection/yolov7_quant.yaml @@ -14,7 +14,7 @@ Distillation: alpha: 1.0 loss: soft_label -Quantization: +QuantAware: onnx_format: true activation_quantize_type: 'moving_average_abs_max' quantize_op_types: diff --git a/tools/common_tools/auto_compression/configs/segmentation/pp_liteseg_quant.yaml b/tools/common_tools/auto_compression/configs/segmentation/pp_liteseg_quant.yaml index 9c04f65d3..beddb370d 100644 --- a/tools/common_tools/auto_compression/configs/segmentation/pp_liteseg_quant.yaml +++ b/tools/common_tools/auto_compression/configs/segmentation/pp_liteseg_quant.yaml @@ -17,7 +17,7 @@ Distillation: node: - conv2d_94.tmp_0 -Quantization: +QuantAware: onnx_format: True quantize_op_types: - conv2d From ad840082634e9ed3ced23c3da331e6b7221eb460 Mon Sep 17 00:00:00 2001 From: yunyaoXYY Date: Mon, 12 Dec 2022 13:33:19 +0000 Subject: [PATCH 05/30] Add neww quantization support for paddleclas model --- tools/common_tools/auto_compression/README.md | 5 ++ .../auto_compression/README_EN.md | 4 ++ .../classification/efficientnetb0_quant.yaml | 50 +++++++++++++++++++ .../mobilenetv3_large_x1_0_quant.yaml | 46 +++++++++++++++++ .../classification/pphgnet_tiny_quant.yaml | 50 +++++++++++++++++++ .../classification/pplcnetv2_base_quant.yaml | 50 +++++++++++++++++++ 6 files changed, 205 insertions(+) create mode 100644 tools/common_tools/auto_compression/configs/classification/efficientnetb0_quant.yaml create mode 100644 tools/common_tools/auto_compression/configs/classification/mobilenetv3_large_x1_0_quant.yaml create mode 100644 tools/common_tools/auto_compression/configs/classification/pphgnet_tiny_quant.yaml create mode 100644 tools/common_tools/auto_compression/configs/classification/pplcnetv2_base_quant.yaml diff --git a/tools/common_tools/auto_compression/README.md b/tools/common_tools/auto_compression/README.md index 7c1b3feac..ed0717a42 100644 --- a/tools/common_tools/auto_compression/README.md +++ b/tools/common_tools/auto_compression/README.md @@ -99,6 +99,11 @@ FastDeploy目前为用户提供了多个模型的压缩[config](./configs/)文 | -------------------- | ------------------------------------------------------------ |----------------------------------------- | | [mobilenetv1_ssld_quant](./configs/classification/mobilenetv1_ssld_quant.yaml) | [mobilenetv1_ssld](https://bj.bcebos.com/paddlehub/fastdeploy/MobileNetV1_ssld_infer.tgz) | | | [resnet50_vd_quant](./configs/classification/resnet50_vd_quant.yaml) | [resnet50_vd](https://bj.bcebos.com/paddlehub/fastdeploy/ResNet50_vd_infer.tgz) | | +| [efficientnetb0_quant](./configs/classification/efficientnetb0_quant.yaml) | [efficientnetb0](https://bj.bcebos.com/paddlehub/fastdeploy/EfficientNetB0_small_infer.tgz) | | +| [mobilenetv3_large_x1_0_quant](./configs/classification/mobilenetv3_large_x1_0_quant.yaml) | [mobilenetv3_large_x1_0](https://bj.bcebos.com/paddlehub/fastdeploy/MobileNetV3_large_x1_0_ssld_infer.tgz) | | +| [pphgnet_tiny_quant](./configs/classification/pphgnet_tiny_quant.yaml) | [pphgnet_tiny](https://bj.bcebos.com/paddlehub/fastdeploy/PPHGNet_tiny_ssld_infer.tgz) | | +| [pplcnetv2_base_quant](./configs/classification/pplcnetv2_base_quant.yaml) | [pplcnetv2_base](https://bj.bcebos.com/paddlehub/fastdeploy/PPLCNetV2_base_infer.tgz) | | + | [yolov5s_quant](./configs/detection/yolov5s_quant.yaml) | [yolov5s](https://paddle-slim-models.bj.bcebos.com/act/yolov5s.onnx) | | | [yolov6s_quant](./configs/detection/yolov6s_quant.yaml) | [yolov6s](https://paddle-slim-models.bj.bcebos.com/act/yolov6s.onnx) | | | [yolov7_quant](./configs/detection/yolov7_quant.yaml) | [yolov7](https://paddle-slim-models.bj.bcebos.com/act/yolov7.onnx) | | diff --git a/tools/common_tools/auto_compression/README_EN.md b/tools/common_tools/auto_compression/README_EN.md index fb1480819..c3b0cfee6 100644 --- a/tools/common_tools/auto_compression/README_EN.md +++ b/tools/common_tools/auto_compression/README_EN.md @@ -107,6 +107,10 @@ FastDeploy currently provides users with compression [config](./configs/) files | -------------------- | ------------------------------------------------------------ |----------------------------------------- | | [mobilenetv1_ssld_quant](./configs/classification/mobilenetv1_ssld_quant.yaml) | [mobilenetv1_ssld](https://bj.bcebos.com/paddlehub/fastdeploy/MobileNetV1_ssld_infer.tgz) | | | [resnet50_vd_quant](./configs/classification/resnet50_vd_quant.yaml) | [resnet50_vd](https://bj.bcebos.com/paddlehub/fastdeploy/ResNet50_vd_infer.tgz) | | +| [efficientnetb0_quant](./configs/classification/efficientnetb0_quant.yaml) | [efficientnetb0](https://bj.bcebos.com/paddlehub/fastdeploy/EfficientNetB0_small_infer.tgz) | | +| [mobilenetv3_large_x1_0_quant](./configs/classification/mobilenetv3_large_x1_0_quant.yaml) | [mobilenetv3_large_x1_0](https://bj.bcebos.com/paddlehub/fastdeploy/MobileNetV3_large_x1_0_ssld_infer.tgz) | | +| [pphgnet_tiny_quant](./configs/classification/pphgnet_tiny_quant.yaml) | [pphgnet_tiny](https://bj.bcebos.com/paddlehub/fastdeploy/PPHGNet_tiny_ssld_infer.tgz) | | +| [pplcnetv2_base_quant](./configs/classification/pplcnetv2_base_quant.yaml) | [pplcnetv2_base](https://bj.bcebos.com/paddlehub/fastdeploy/PPLCNetV2_base_infer.tgz) | | | [yolov5s_quant](./configs/detection/yolov5s_quant.yaml) | [yolov5s](https://paddle-slim-models.bj.bcebos.com/act/yolov5s.onnx) | | | [yolov6s_quant](./configs/detection/yolov6s_quant.yaml) | [yolov6s](https://paddle-slim-models.bj.bcebos.com/act/yolov6s.onnx) | | | [yolov7_quant](./configs/detection/yolov7_quant.yaml) | [yolov7](https://paddle-slim-models.bj.bcebos.com/act/yolov7.onnx) | | diff --git a/tools/common_tools/auto_compression/configs/classification/efficientnetb0_quant.yaml b/tools/common_tools/auto_compression/configs/classification/efficientnetb0_quant.yaml new file mode 100644 index 000000000..4a502445e --- /dev/null +++ b/tools/common_tools/auto_compression/configs/classification/efficientnetb0_quant.yaml @@ -0,0 +1,50 @@ +Global: + model_dir: ./EfficientNetB0_small_infer/ + format: 'paddle' + model_filename: inference.pdmodel + params_filename: inference.pdiparams + qat_image_path: ./ImageNet_val_640 + ptq_image_path: ./ImageNet_val_640 + input_list: ['inputs'] + qat_preprocess: cls_image_preprocess + ptq_preprocess: cls_image_preprocess + qat_batch_size: 32 + +Distillation: + alpha: 1.0 + loss: l2 + node: + - softmax_0.tmp_0 + +QuantAware: + use_pact: true + activation_bits: 8 + is_full_quantize: false + onnx_format: True + activation_quantize_type: moving_average_abs_max + weight_quantize_type: channel_wise_abs_max + not_quant_pattern: + - skip_quant + quantize_op_types: + - conv2d + - depthwise_conv2d + - matmul + - matmul_v2 + weight_bits: 8 + +TrainConfig: + epochs: 1 + eval_iter: 500 + learning_rate: + type: CosineAnnealingDecay + learning_rate: 0.015 + T_max: 8000 + optimizer_builder: + optimizer: + type: Momentum + weight_decay: 0.00002 + origin_metric: 0.7738 + +PTQ: + calibration_method: 'avg' # option: avg, abs_max, hist, KL, mse + skip_tensor_list: None diff --git a/tools/common_tools/auto_compression/configs/classification/mobilenetv3_large_x1_0_quant.yaml b/tools/common_tools/auto_compression/configs/classification/mobilenetv3_large_x1_0_quant.yaml new file mode 100644 index 000000000..3ee6c8d3b --- /dev/null +++ b/tools/common_tools/auto_compression/configs/classification/mobilenetv3_large_x1_0_quant.yaml @@ -0,0 +1,46 @@ +Global: + model_dir: ./MobileNetV3_large_x1_0_ssld_infer/ + format: 'paddle' + model_filename: inference.pdmodel + params_filename: inference.pdiparams + qat_image_path: ./ImageNet_val_640 + ptq_image_path: ./ImageNet_val_640 + input_list: ['inputs'] + qat_preprocess: cls_image_preprocess + ptq_preprocess: cls_image_preprocess + qat_batch_size: 128 + + +Distillation: + alpha: 1.0 + loss: soft_label + +QuantAware: + use_pact: true + activation_bits: 8 + is_full_quantize: false + onnx_format: True + activation_quantize_type: moving_average_abs_max + weight_quantize_type: channel_wise_abs_max + not_quant_pattern: + - skip_quant + quantize_op_types: + - conv2d + - depthwise_conv2d + - matmul + - matmul_v2 + weight_bits: 8 + +TrainConfig: + epochs: 2 + eval_iter: 5000 + learning_rate: 0.001 + optimizer_builder: + optimizer: + type: Momentum + weight_decay: 0.00002 + origin_metric: 0.7896 + +PTQ: + calibration_method: 'avg' # option: avg, abs_max, hist, KL, mse + skip_tensor_list: None diff --git a/tools/common_tools/auto_compression/configs/classification/pphgnet_tiny_quant.yaml b/tools/common_tools/auto_compression/configs/classification/pphgnet_tiny_quant.yaml new file mode 100644 index 000000000..b282d6dde --- /dev/null +++ b/tools/common_tools/auto_compression/configs/classification/pphgnet_tiny_quant.yaml @@ -0,0 +1,50 @@ +Global: + model_dir: ./PPHGNet_tiny_ssld_infer/ + format: 'paddle' + model_filename: inference.pdmodel + params_filename: inference.pdiparams + qat_image_path: ./ImageNet_val_640 + ptq_image_path: ./ImageNet_val_640 + input_list: ['x'] + qat_preprocess: cls_image_preprocess + ptq_preprocess: cls_image_preprocess + qat_batch_size: 32 + +Distillation: + alpha: 1.0 + loss: l2 + node: + - softmax_1.tmp_0 + +QuantAware: + use_pact: true + activation_bits: 8 + is_full_quantize: false + onnx_format: True + activation_quantize_type: moving_average_abs_max + weight_quantize_type: channel_wise_abs_max + not_quant_pattern: + - skip_quant + quantize_op_types: + - conv2d + - depthwise_conv2d + - matmul + - matmul_v2 + weight_bits: 8 + +TrainConfig: + epochs: 1 + eval_iter: 500 + learning_rate: + type: CosineAnnealingDecay + learning_rate: 0.015 + T_max: 8000 + optimizer_builder: + optimizer: + type: Momentum + weight_decay: 0.00002 + origin_metric: 0.7959 + +PTQ: + calibration_method: 'avg' # option: avg, abs_max, hist, KL, mse + skip_tensor_list: None diff --git a/tools/common_tools/auto_compression/configs/classification/pplcnetv2_base_quant.yaml b/tools/common_tools/auto_compression/configs/classification/pplcnetv2_base_quant.yaml new file mode 100644 index 000000000..7c81c3ce2 --- /dev/null +++ b/tools/common_tools/auto_compression/configs/classification/pplcnetv2_base_quant.yaml @@ -0,0 +1,50 @@ +Global: + model_dir: ./PPLCNetV2_base_infer/ + format: 'paddle' + model_filename: inference.pdmodel + params_filename: inference.pdiparams + qat_image_path: ./ImageNet_val_640 + ptq_image_path: ./ImageNet_val_640 + input_list: ['x'] + qat_preprocess: cls_image_preprocess + ptq_preprocess: cls_image_preprocess + qat_batch_size: 32 + +Distillation: + alpha: 1.0 + loss: l2 + node: + - softmax_1.tmp_0 + +QuantAware: + use_pact: true + activation_bits: 8 + is_full_quantize: false + onnx_format: True + activation_quantize_type: moving_average_abs_max + weight_quantize_type: channel_wise_abs_max + not_quant_pattern: + - skip_quant + quantize_op_types: + - conv2d + - depthwise_conv2d + - matmul + - matmul_v2 + weight_bits: 8 + +TrainConfig: + epochs: 1 + eval_iter: 500 + learning_rate: + type: CosineAnnealingDecay + learning_rate: 0.015 + T_max: 8000 + optimizer_builder: + optimizer: + type: Momentum + weight_decay: 0.00002 + origin_metric: 0.7704 + +PTQ: + calibration_method: 'avg' # option: avg, abs_max, hist, KL, mse + skip_tensor_list: None From fd0a3be0b993dc7ab739f4745f6f804a413917e4 Mon Sep 17 00:00:00 2001 From: yunyaoXYY Date: Mon, 12 Dec 2022 13:44:14 +0000 Subject: [PATCH 06/30] Update quantized Yolov6s model download link --- examples/vision/detection/yolov6/quantize/cpp/README.md | 2 +- examples/vision/detection/yolov6/quantize/python/README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/vision/detection/yolov6/quantize/cpp/README.md b/examples/vision/detection/yolov6/quantize/cpp/README.md index 7ad762100..a929f8ced 100755 --- a/examples/vision/detection/yolov6/quantize/cpp/README.md +++ b/examples/vision/detection/yolov6/quantize/cpp/README.md @@ -23,7 +23,7 @@ cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x make -j #下载FastDeloy提供的yolov6s量化模型文件和测试图片 -wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov6s_qat_model.tar +wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov6s_qat_model_new.tar tar -xvf yolov6s_qat_model.tar wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg diff --git a/examples/vision/detection/yolov6/quantize/python/README.md b/examples/vision/detection/yolov6/quantize/python/README.md index 057e13f9a..e9b80bb6f 100755 --- a/examples/vision/detection/yolov6/quantize/python/README.md +++ b/examples/vision/detection/yolov6/quantize/python/README.md @@ -17,7 +17,7 @@ git clone https://github.com/PaddlePaddle/FastDeploy.git cd examples/slim/yolov6/python #下载FastDeloy提供的yolov6s量化模型文件和测试图片 -wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov6s_qat_model.tar +wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov6s_qat_model_new.tar tar -xvf yolov6s_qat_model.tar wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg From 1cf54a078809212bfbb269ce678f01c2b18a053e Mon Sep 17 00:00:00 2001 From: yunyaoXYY Date: Tue, 13 Dec 2022 12:50:32 +0000 Subject: [PATCH 07/30] Improve PPOCR comments --- fastdeploy/vision/ocr/ppocr/classifier.h | 14 ++++++++++++-- fastdeploy/vision/ocr/ppocr/cls_postprocessor.h | 4 ++-- fastdeploy/vision/ocr/ppocr/cls_preprocessor.h | 4 ++-- fastdeploy/vision/ocr/ppocr/det_preprocessor.h | 2 +- fastdeploy/vision/ocr/ppocr/rec_postprocessor.h | 4 ++-- fastdeploy/vision/ocr/ppocr/rec_preprocessor.h | 4 ++-- fastdeploy/vision/ocr/ppocr/recognizer.h | 10 +++++++++- 7 files changed, 30 insertions(+), 12 deletions(-) diff --git a/fastdeploy/vision/ocr/ppocr/classifier.h b/fastdeploy/vision/ocr/ppocr/classifier.h index 5a4ed02a0..cd035e269 100755 --- a/fastdeploy/vision/ocr/ppocr/classifier.h +++ b/fastdeploy/vision/ocr/ppocr/classifier.h @@ -43,11 +43,21 @@ class FASTDEPLOY_DECL Classifier : public FastDeployModel { const ModelFormat& model_format = ModelFormat::PADDLE); /// Get model's name std::string ModelName() const { return "ppocr/ocr_cls"; } - virtual bool Predict(const cv::Mat& img, int32_t* cls_label, float* cls_score); + + /** \brief Predict the input image and get OCR classification model cls_result. + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format. + * \param[in] cls_label The label result of cls model will be written in to this param. + * \param[in] cls_score The score result of cls model will be written in to this param. + * \return true if the prediction is successed, otherwise false. + */ + virtual bool Predict(const cv::Mat& img, + int32_t* cls_label, float* cls_score); /** \brief BatchPredict the input image and get OCR classification model cls_result. * * \param[in] images The list of input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format. - * \param[in] cls_results The output of OCR classification model cls_result will be writen to this structure. + * \param[in] cls_labels The label results of cls model will be written in to this vector. + * \param[in] cls_scores The score results of cls model will be written in to this vector. * \return true if the prediction is successed, otherwise false. */ virtual bool BatchPredict(const std::vector& images, diff --git a/fastdeploy/vision/ocr/ppocr/cls_postprocessor.h b/fastdeploy/vision/ocr/ppocr/cls_postprocessor.h index a755e1294..d9702e1a1 100644 --- a/fastdeploy/vision/ocr/ppocr/cls_postprocessor.h +++ b/fastdeploy/vision/ocr/ppocr/cls_postprocessor.h @@ -28,8 +28,8 @@ class FASTDEPLOY_DECL ClassifierPostprocessor { /** \brief Process the result of runtime and fill to ClassifyResult structure * * \param[in] tensors The inference result from runtime - * \param[in] cls_labels The output result of classification - * \param[in] cls_scores The output result of classification + * \param[in] cls_labels The output label results of classification model + * \param[in] cls_scores The output score results of classification model * \return true if the postprocess successed, otherwise false */ bool Run(const std::vector& tensors, diff --git a/fastdeploy/vision/ocr/ppocr/cls_preprocessor.h b/fastdeploy/vision/ocr/ppocr/cls_preprocessor.h index ed75d55b2..8c1c81611 100644 --- a/fastdeploy/vision/ocr/ppocr/cls_preprocessor.h +++ b/fastdeploy/vision/ocr/ppocr/cls_preprocessor.h @@ -26,8 +26,8 @@ class FASTDEPLOY_DECL ClassifierPreprocessor { public: /** \brief Process the input image and prepare input tensors for runtime * - * \param[in] images The input image data list, all the elements are returned by cv::imread() - * \param[in] outputs The output tensors which will feed in runtime + * \param[in] images The input data list, all the elements are FDMat + * \param[in] outputs The output tensors which will be fed into runtime * \return true if the preprocess successed, otherwise false */ bool Run(std::vector* images, std::vector* outputs); diff --git a/fastdeploy/vision/ocr/ppocr/det_preprocessor.h b/fastdeploy/vision/ocr/ppocr/det_preprocessor.h index d66e785d3..705f19c7b 100644 --- a/fastdeploy/vision/ocr/ppocr/det_preprocessor.h +++ b/fastdeploy/vision/ocr/ppocr/det_preprocessor.h @@ -26,7 +26,7 @@ class FASTDEPLOY_DECL DBDetectorPreprocessor { public: /** \brief Process the input image and prepare input tensors for runtime * - * \param[in] images The input image data list, all the elements are returned by cv::imread() + * \param[in] images The input data list, all the elements are FDMat * \param[in] outputs The output tensors which will feed in runtime * \param[in] batch_det_img_info_ptr The output of preprocess * \return true if the preprocess successed, otherwise false diff --git a/fastdeploy/vision/ocr/ppocr/rec_postprocessor.h b/fastdeploy/vision/ocr/ppocr/rec_postprocessor.h index 711ae3a01..5f9aa70f2 100644 --- a/fastdeploy/vision/ocr/ppocr/rec_postprocessor.h +++ b/fastdeploy/vision/ocr/ppocr/rec_postprocessor.h @@ -35,8 +35,8 @@ class FASTDEPLOY_DECL RecognizerPostprocessor { /** \brief Process the result of runtime and fill to RecognizerResult * * \param[in] tensors The inference result from runtime - * \param[in] texts The output result of recognizer - * \param[in] rec_scores The output result of recognizer + * \param[in] texts The output text results of recognizer + * \param[in] rec_scores The output score results of recognizer * \return true if the postprocess successed, otherwise false */ bool Run(const std::vector& tensors, diff --git a/fastdeploy/vision/ocr/ppocr/rec_preprocessor.h b/fastdeploy/vision/ocr/ppocr/rec_preprocessor.h index 1dad75870..c6c942468 100644 --- a/fastdeploy/vision/ocr/ppocr/rec_preprocessor.h +++ b/fastdeploy/vision/ocr/ppocr/rec_preprocessor.h @@ -26,8 +26,8 @@ class FASTDEPLOY_DECL RecognizerPreprocessor { public: /** \brief Process the input image and prepare input tensors for runtime * - * \param[in] images The input image data list, all the elements are returned by cv::imread() - * \param[in] outputs The output tensors which will feed in runtime + * \param[in] images The input data list, all the elements are FDMat + * \param[in] outputs The output tensors which will be fed into runtime * \return true if the preprocess successed, otherwise false */ bool Run(std::vector* images, std::vector* outputs); diff --git a/fastdeploy/vision/ocr/ppocr/recognizer.h b/fastdeploy/vision/ocr/ppocr/recognizer.h index 8a5f5bc70..bba8a4447 100755 --- a/fastdeploy/vision/ocr/ppocr/recognizer.h +++ b/fastdeploy/vision/ocr/ppocr/recognizer.h @@ -45,11 +45,19 @@ class FASTDEPLOY_DECL Recognizer : public FastDeployModel { const ModelFormat& model_format = ModelFormat::PADDLE); /// Get model's name std::string ModelName() const { return "ppocr/ocr_rec"; } + /** \brief Predict the input image and get OCR recognition model result. + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format. + * \param[in] text The text result of rec model will be written into this parameter. + * \param[in] rec_score The sccore result of rec model will be written into this parameter. + * \return true if the prediction is successed, otherwise false. + */ virtual bool Predict(const cv::Mat& img, std::string* text, float* rec_score); /** \brief BatchPredict the input image and get OCR recognition model result. * * \param[in] images The list of input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format. - * \param[in] rec_results The output of OCR recognition model result will be writen to this structure. + * \param[in] texts The list of text results of rec model will be written into this vector. + * \param[in] rec_scores The list of sccore result of rec model will be written into this vector. * \return true if the prediction is successed, otherwise false. */ virtual bool BatchPredict(const std::vector& images, From 65aacd9e78e4c296e00391ee62e720f058e17221 Mon Sep 17 00:00:00 2001 From: yunyaoXYY Date: Tue, 20 Dec 2022 07:16:30 +0000 Subject: [PATCH 08/30] Add English doc for quantization --- docs/en/quantize.md | 175 +++++++++++++++++++++++++++++--------------- 1 file changed, 114 insertions(+), 61 deletions(-) diff --git a/docs/en/quantize.md b/docs/en/quantize.md index c4535808e..494d15f5b 100644 --- a/docs/en/quantize.md +++ b/docs/en/quantize.md @@ -1,79 +1,132 @@ -[English](../en/quantize.md) | 简体中文 +[中文](../cn/quantize.md) | English -# 量化加速 -量化是一种流行的模型压缩方法,量化后的模型拥有更小的体积和更快的推理速度. -FastDeploy基于PaddleSlim, 集成了一键模型量化的工具, 同时, FastDeploy支持部署量化后的模型, 帮助用户实现推理加速. +# Quantization Acceleration +Quantization is a popular method of model compression, resulting in smaller models size and faster inference speed. +Based on PaddleSlim's Auto Compression Toolkit (ACT), FastDeploy provides users with a one-click model compression automation tool. This tool includes a variety of strategies for auto-compression, the current main strategies are post-trainning quantization and quantaware distillation training. At the same time, FastDeploy supports the deployment of compressed models to help users achieve inference acceleration. +## Multiple inference engines and hardware support for quantized model deployment in FastDeploy +Currently, multiple inference engines in FastDeploy can support the deployment of quantized models on different hardware. -## FastDeploy 多个引擎和硬件支持量化模型部署 -当前,FastDeploy中多个推理后端可以在不同硬件上支持量化模型的部署. 支持情况如下: +| Hardware/Inference engine | ONNX Runtime | Paddle Inference | TensorRT | Paddle-TensorRT | +| :-----------| :-------- | :--------------- | :------- | :------- | +| CPU | Support | Support | | | +| GPU | | | Support | Support | -| 硬件/推理后端 | ONNX Runtime | Paddle Inference | TensorRT | -| :-----------| :-------- | :--------------- | :------- | -| CPU | 支持 | 支持 | | -| GPU | | | 支持 | +## Model Quantization +### Quantization Method +Based on PaddleSlim, the quantization methods currently provided by FastDeploy one-click model auto-compression are quantaware distillation training and post training quantization, quantaware distillation training to obtain quantization models through model training, and post training quantization to complete the quantization of models without model training. FastDeploy can deploy the quantized models produced by both methods. -## 模型量化 - -### 量化方法 -基于PaddleSlim, 目前FastDeploy提供的的量化方法有量化蒸馏训练和离线量化, 量化蒸馏训练通过模型训练来获得量化模型, 离线量化不需要模型训练即可完成模型的量化. FastDeploy 对两种方式产出的量化模型均能部署. - -两种方法的主要对比如下表所示: -| 量化方法 | 量化过程耗时 | 量化模型精度 | 模型体积 | 推理速度 | +The comparison of the two methods is shown in the following table: +| Method | Time Cost | Quantized Model Accuracy | Quantized Model Size | Inference Speed | | :-----------| :--------| :-------| :------- | :------- | -| 离线量化 | 无需训练,耗时短 | 比量化蒸馏训练稍低 | 两者一致 | 两者一致 | -| 量化蒸馏训练 | 需要训练,耗时稍高 | 较未量化模型有少量损失 | 两者一致 |两者一致 | +| Post Training Quantization | Less than Quantware| Lower than Quantaware | Same | Same | +| Quantaware Distillation Training | Normal | Lower than FP32 Model | Same |Same | -### 用户使用FastDeploy一键模型量化工具来量化模型 -Fastdeploy基于PaddleSlim, 为用户提供了一键模型量化的工具,请参考如下文档进行模型量化. -- [FastDeploy 一键模型量化](../../tools/auto_compression/) -当用户获得产出的量化模型之后,即可以使用FastDeploy来部署量化模型. +### Use FastDeploy one-click model automation compression tool to quantify models +Based on PaddleSlim's Auto Compression Toolkit (ACT), FastDeploy provides users with a one-click model automation compression tool, please refer to the following document for one-click model automation compression. +- [FastDeploy One-Click Model Automation Compression](../../tools/common_tools/auto_compression/) + +## Benchmark +Currently, FastDeploy supports automated compression, and the Runtime Benchmark and End-to-End Benchmark of the model that completes the deployment test are shown below. + +NOTE: +- Runtime latency is the inference latency of the model on various Runtimes, including CPU->GPU data copy, GPU inference, and GPU->CPU data copy time. It does not include the respective pre and post processing time of the models. +- The end-to-end latency is the latency of the model in the actual inference scenario, including the pre and post processing of the model. +- The measured latencies are averaged over 1000 inferences, in milliseconds. +- INT8 + FP16 is to enable the FP16 inference option for Runtime while inferring the INT8 quantization model. +- INT8 + FP16 + PM is the option to use Pinned Memory while inferring INT8 quantization model and turning on FP16, which can speed up the GPU->CPU data copy speed. +- The maximum speedup ratio is obtained by dividing the FP32 latency by the fastest INT8 inference latency. +- The strategy is quantitative distillation training, using a small number of unlabeled data sets to train the quantitative model, and verify the accuracy on the full validation set, INT8 accuracy does not represent the highest INT8 accuracy. +- The CPU is Intel(R) Xeon(R) Gold 6271C with a fixed CPU thread count of 1 in all tests. The GPU is Tesla T4, TensorRT version 8.4.15. + +### YOLO Series +#### Runtime Benchmark +| Model |Inference Backends |Hardware | FP32 Runtime Latency | INT8 Runtime Latency | INT8 + FP16 Runtime Latency | INT8+FP16+PM Runtime Latency | Max Speedup | FP32 mAP | INT8 mAP | Method | +| ------------------- | -----------------|-----------| -------- |-------- |-------- | --------- |-------- |----- |----- |----- | +| [YOLOv5s](../../examples/vision/detection/yolov5/quantize/) | TensorRT | GPU | 7.87 | 4.51 | 4.31 | 3.17 | 2.48 | 37.6 | 36.7 | Quantaware Distillation Training | +| [YOLOv5s](../../examples/vision/detection/yolov5/quantize/) | Paddle-TensorRT | GPU | 7.99 | None | 4.46 | 3.31 | 2.41 | 37.6 | 36.8 | Quantaware Distillation Training | +| [YOLOv5s](../../examples/vision/detection/yolov5/quantize/) | ONNX Runtime | CPU | 176.41 | 91.90 | None | None | 1.90 | 37.6 | 33.1 |Quantaware Distillation Training | +| [YOLOv5s](../../examples/vision/detection/yolov5/quantize/) | Paddle Inference| CPU | 213.73 | 130.19 | None | None | 1.64 |37.6 | 35.2 | Quantaware Distillation Training | +| [YOLOv6s](../../examples/vision/detection/yolov6/quantize/) | TensorRT | GPU | 9.47 | 3.23 | 4.09 |2.81 | 3.37 | 42.5 | 40.7|Quantaware Distillation Training | +| [YOLOv6s](../../examples/vision/detection/yolov6/quantize/) | Paddle-TensorRT | GPU | 9.31 | None| 4.17 | 2.95 | 3.16 | 42.5 | 40.7|Quantaware Distillation Training | +| [YOLOv6s](../../examples/vision/detection/yolov6/quantize/) | ONNX Runtime | CPU | 334.65 | 126.38 | None | None| 2.65 |42.5| 36.8|Quantaware Distillation Training | +| [YOLOv6s](../../examples/vision/detection/yolov6/quantize/) | Paddle Inference | CPU | 352.87 | 123.12 |None | None| 2.87 |42.5| 40.8|Quantaware Distillation Training | +| [YOLOv7](../../examples/vision/detection/yolov7/quantize/) | TensorRT | GPU | 27.47 | 6.52 | 6.74| 5.19| 5.29 | 51.1| 50.4|Quantaware Distillation Training | +| [YOLOv7](../../examples/vision/detection/yolov7/quantize/) | Paddle-TensorRT | GPU | 27.87|None|6.91|5.86 | 4.76 | 51.1| 50.4|Quantaware Distillation Training | +| [YOLOv7](../../examples/vision/detection/yolov7/quantize/) | ONNX Runtime | CPU | 996.65 | 467.15 |None|None | 2.13 | 51.1 | 43.3|Quantaware Distillation Training | +| [YOLOv7](../../examples/vision/detection/yolov7/quantize/) | Paddle Inference | CPU | 995.85 | 477.93|None|None | 2.08 |51.1 | 46.2|Quantaware Distillation Training | + +#### End2End Benchmark +| Model |Inference Backends |Hardware | FP32 End2End Latency | INT8 End2End Latency | INT8 + FP16 End2End Latency | INT8+FP16+PM End2End Latency | Max Speedup | FP32 mAP | INT8 mAP | Method | +| ------------------- | -----------------|-----------| -------- |-------- |-------- | --------- |-------- |----- |----- |----- | +| [YOLOv5s](../../examples/vision/detection/yolov5/quantize/) | TensorRT | GPU | 24.61 | 21.20 | 20.78 | 20.94 | 1.18 | 37.6 | 36.7 | Quantaware Distillation Training | +| [YOLOv5s](../../examples/vision/detection/yolov5/quantize/) | Paddle-TensorRT | GPU | 23.53 | None | 21.98 | 19.84 | 1.28 | 37.6 | 36.8 | Quantaware Distillation Training | +| [YOLOv5s](../../examples/vision/detection/yolov5/quantize/) | ONNX Runtime | CPU | 197.323 | 110.99 | None | None | 1.78 | 37.6 | 33.1 |Quantaware Distillation Training | +| [YOLOv5s](../../examples/vision/detection/yolov5/quantize/) | Paddle Inference| CPU | 235.73 | 144.82 | None | None | 1.63 |37.6 | 35.2 | Quantaware Distillation Training | +| [YOLOv6s](../../examples/vision/detection/yolov6/quantize/) | TensorRT | GPU | 15.66 | 11.30 | 10.25 |9.59 | 1.63 | 42.5 | 40.7|Quantaware Distillation Training | +| [YOLOv6s](../../examples/vision/detection/yolov6/quantize/) | Paddle-TensorRT | GPU | 15.03 | None| 11.36 | 9.32 | 1.61 | 42.5 | 40.7|Quantaware Distillation Training | +| [YOLOv6s](../../examples/vision/detection/yolov6/quantize/) | ONNX Runtime | CPU | 348.21 | 126.38 | None | None| 2.82 |42.5| 36.8|Quantaware Distillation Training | +| [YOLOv6s](../../examples/vision/detection/yolov6/quantize/) | Paddle Inference | CPU | 352.87 | 121.64 |None | None| 3.04 |42.5| 40.8|Quantaware Distillation Training | +| [YOLOv7](../../examples/vision/detection/yolov7/quantize/) | TensorRT | GPU | 36.47 | 18.81 | 20.33| 17.58| 2.07 | 51.1| 50.4|Quantaware Distillation Training | +| [YOLOv7](../../examples/vision/detection/yolov7/quantize/) | Paddle-TensorRT | GPU | 37.06|None|20.26|17.53 | 2.11 | 51.1| 50.4|Quantaware Distillation Training | +| [YOLOv7](../../examples/vision/detection/yolov7/quantize/) | ONNX Runtime | CPU | 988.85 | 478.08 |None|None | 2.07 | 51.1 | 43.3|Quantaware Distillation Training | +| [YOLOv7](../../examples/vision/detection/yolov7/quantize/) | Paddle Inference | CPU | 1031.73 | 500.12|None|None | 2.06 |51.1 | 46.2|Quantaware Distillation Training | -## 量化示例 -目前, FastDeploy已支持的模型量化如下表所示: -### YOLO 系列 -| 模型 |推理后端 |部署硬件 | FP32推理时延 | INT8推理时延 | 加速比 | FP32 mAP | INT8 mAP | 量化方式 | -| ------------------- | -----------------|-----------| -------- |-------- |-------- | --------- |-------- |----- | -| [YOLOv5s](../../examples/vision/detection/yolov5/quantize/) | TensorRT | GPU | 8.79 | 5.17 | 1.70 | 37.6 | 36.6 | 量化蒸馏训练 | -| [YOLOv5s](../../examples/vision/detection/yolov5/quantize/) | ONNX Runtime | CPU | 176.34 | 92.95 | 1.90 | 37.6 | 33.1 |量化蒸馏训练 | -| [YOLOv5s](../../examples/vision/detection/yolov5/quantize/) | Paddle Inference | CPU | 217.05 | 133.31 | 1.63 |37.6 | 36.8 | 量化蒸馏训练 | -| [YOLOv6s](../../examples/vision/detection/yolov6/quantize/) | TensorRT | GPU | 8.60 | 5.16 | 1.67 | 42.5 | 40.6|量化蒸馏训练 | -| [YOLOv6s](../../examples/vision/detection/yolov6/quantize/) | ONNX Runtime | CPU | 338.60 | 128.58 | 2.60 |42.5| 36.1|量化蒸馏训练 | -| [YOLOv6s](../../examples/vision/detection/yolov6/quantize/) | Paddle Inference | CPU | 356.62 | 125.72 | 2.84 |42.5| 41.2|量化蒸馏训练 | -| [YOLOv7](../../examples/vision/detection/yolov7/quantize/) | TensorRT | GPU | 24.57 | 9.40 | 2.61 | 51.1| 50.8|量化蒸馏训练 | -| [YOLOv7](../../examples/vision/detection/yolov7/quantize/) | ONNX Runtime | CPU | 976.88 | 462.69 | 2.11 | 51.1 | 42.5|量化蒸馏训练 | -| [YOLOv7](../../examples/vision/detection/yolov7/quantize/) | Paddle Inference | CPU | 1022.55 | 490.87 | 2.08 |51.1 | 46.3|量化蒸馏训练 | +### PaddleClasSeries +#### Runtime Benchmark +| Model |Inference Backends |Hardware | FP32 Runtime Latency | INT8 Runtime Latency | INT8 + FP16 Runtime Latency | INT8+FP16+PM Runtime Latency | Max Speedup | FP32 Top1 | INT8 Top1 | Method | +| ------------------- | -----------------|-----------| -------- |-------- |-------- | --------- |-------- |----- |----- |----- | +| [ResNet50_vd](../../examples/vision/classification/paddleclas/quantize/) | TensorRT | GPU | 3.55 | 0.99|0.98|1.06 | 3.62 | 79.12 | 79.06 | Post Training Quantization | +| [ResNet50_vd](../../examples/vision/classification/paddleclas/quantize/) | Paddle-TensorRT | GPU | 3.46 |None |0.87|1.03 | 3.98 | 79.12 | 79.06 | Post Training Quantization | +| [ResNet50_vd](../../examples/vision/classification/paddleclas/quantize/) | ONNX Runtime | CPU | 76.14 | 35.43 |None|None | 2.15 | 79.12 | 78.87| Post Training Quantization| +| [ResNet50_vd](../../examples/vision/classification/paddleclas/quantize/) | Paddle Inference | CPU | 76.21 | 24.01 |None|None | 3.17 | 79.12 | 78.55 | Post Training Quantization| +| [MobileNetV1_ssld](../../examples/vision/classification/paddleclas/quantize/) | TensorRT | GPU | 0.91 | 0.43 |0.49 | 0.54 | 2.12 |77.89 | 76.86 | Post Training Quantization | +| [MobileNetV1_ssld](../../examples/vision/classification/paddleclas/quantize/) | Paddle-TensorRT | GPU | 0.88| None| 0.49|0.51 | 1.80 |77.89 | 76.86 | Post Training Quantization | +| [MobileNetV1_ssld](../../examples/vision/classification/paddleclas/quantize/) | ONNX Runtime | CPU | 30.53 | 9.59|None|None | 3.18 |77.89 | 75.09 |Post Training Quantization | +| [MobileNetV1_ssld](../../examples/vision/classification/paddleclas/quantize/) | Paddle Inference | CPU | 12.29 | 4.68 | None|None|2.62 |77.89 | 71.36 |Post Training Quantization | -上表中的数据, 为模型量化前后,在FastDeploy部署的Runtime推理性能. -- 测试数据为COCO2017验证集中的图片. -- 推理时延为在不同Runtime上推理的时延, 单位是毫秒. -- CPU为Intel(R) Xeon(R) Gold 6271C, GPU为Tesla T4, TensorRT版本8.4.15, 所有测试中固定CPU线程数为1. +#### End2End Benchmark +| Model |Inference Backends |Hardware | FP32 End2End Latency | INT8 End2End Latency | INT8 + FP16 End2End Latency | INT8+FP16+PM End2End Latency | Max Speedup | FP32 Top1 | INT8 Top1 | Method | +| ------------------- | -----------------|-----------| -------- |-------- |-------- | --------- |-------- |----- |----- |----- | +| [ResNet50_vd](../../examples/vision/classification/paddleclas/quantize/) | TensorRT | GPU | 4.92| 2.28|2.24|2.23 | 2.21 | 79.12 | 79.06 | Post Training Quantization | +| [ResNet50_vd](../../examples/vision/classification/paddleclas/quantize/) | Paddle-TensorRT | GPU | 4.48|None |2.09|2.10 | 2.14 | 79.12 | 79.06 | Post Training Quantization | +| [ResNet50_vd](../../examples/vision/classification/paddleclas/quantize/) | ONNX Runtime | CPU | 77.43 | 41.90 |None|None | 1.85 | 79.12 | 78.87| Post Training Quantization| +| [ResNet50_vd](../../examples/vision/classification/paddleclas/quantize/) | Paddle Inference | CPU | 80.60 | 27.75 |None|None | 2.90 | 79.12 | 78.55 | Post Training Quantization| +| [MobileNetV1_ssld](../../examples/vision/classification/paddleclas/quantize/) | TensorRT | GPU | 2.19 | 1.48|1.57| 1.57 | 1.48 |77.89 | 76.86 | Post Training Quantization | +| [MobileNetV1_ssld](../../examples/vision/classification/paddleclas/quantize/) | Paddle-TensorRT | GPU | 2.04| None| 1.47|1.45 | 1.41 |77.89 | 76.86 | Post Training Quantization | +| [MobileNetV1_ssld](../../examples/vision/classification/paddleclas/quantize/) | ONNX Runtime | CPU | 34.02 | 12.97|None|None | 2.62 |77.89 | 75.09 |Post Training Quantization | +| [MobileNetV1_ssld](../../examples/vision/classification/paddleclas/quantize/) | Paddle Inference | CPU | 16.31 | 7.42 | None|None| 2.20 |77.89 | 71.36 |Post Training Quantization | -### PaddleDetection系列 -| 模型 |推理后端 |部署硬件 | FP32推理时延 | INT8推理时延 | 加速比 | FP32 mAP | INT8 mAP |量化方式 | -| ------------------- | -----------------|-----------| -------- |-------- |-------- | --------- |-------- |----- | -| [ppyoloe_crn_l_300e_coco](../../examples/vision/detection/paddledetection/quantize ) | TensorRT | GPU | 24.52 | 11.53 | 2.13 | 51.4 | 50.7 | 量化蒸馏训练 | -| [ppyoloe_crn_l_300e_coco](../../examples/vision/detection/paddledetection/quantize) | ONNX Runtime | CPU | 1085.62 | 457.56 | 2.37 |51.4 | 50.0 |量化蒸馏训练 | -上表中的数据, 为模型量化前后,在FastDeploy部署的Runtime推理性能. -- 测试图片为COCO val2017中的图片. -- 推理时延为在不同Runtime上推理的时延, 单位是毫秒. -- CPU为Intel(R) Xeon(R) Gold 6271C, GPU为Tesla T4, TensorRT版本8.4.15, 所有测试中固定CPU线程数为1. +### PaddleDetectionSeries +#### Runtime Benchmark +| Model |Inference Backends |Hardware | FP32 Runtime Latency | INT8 Runtime Latency | INT8 + FP16 Runtime Latency | INT8+FP16+PM Runtime Latency | Max Speedup | FP32 mAP | INT8 mAP | Method | +| ------------------- | -----------------|-----------| -------- |-------- |-------- | --------- |-------- |----- |----- |----- | +| [ppyoloe_crn_l_300e_coco](../../examples/vision/detection/paddledetection/quantize ) | TensorRT | GPU | 27.90 | 6.39 |6.44|5.95 | 4.67 | 51.4 | 50.7 | Quantaware Distillation Training | +| [ppyoloe_crn_l_300e_coco](../../examples/vision/detection/paddledetection/quantize ) | Paddle-TensorRT | GPU | 30.89 |None | 13.78 |14.01 | 2.24 | 51.4 | 50.5| Quantaware Distillation Training | +| [ppyoloe_crn_l_300e_coco](../../examples/vision/detection/paddledetection/quantize) | ONNX Runtime | CPU | 1057.82 | 449.52 |None|None | 2.35 |51.4 | 50.0 |Quantaware Distillation Training | -### PaddleClas系列 -| 模型 |推理后端 |部署硬件 | FP32推理时延 | INT8推理时延 | 加速比 | FP32 Top1 | INT8 Top1 |量化方式 | -| ------------------- | -----------------|-----------| -------- |-------- |-------- | --------- |-------- |----- | -| [ResNet50_vd](../../examples/vision/classification/paddleclas/quantize/) | ONNX Runtime | CPU | 77.20 | 40.08 | 1.93 | 79.12 | 78.87| 离线量化| -| [ResNet50_vd](../../examples/vision/classification/paddleclas/quantize/) | TensorRT | GPU | 3.70 | 1.80 | 2.06 | 79.12 | 79.06 | 离线量化 | -| [MobileNetV1_ssld](../../examples/vision/classification/paddleclas/quantize/) | ONNX Runtime | CPU | 30.99 | 10.24 | 3.03 |77.89 | 75.09 |离线量化 | -| [MobileNetV1_ssld](../../examples/vision/classification/paddleclas/quantize/) | TensorRT | GPU | 1.80 | 0.58 | 3.10 |77.89 | 76.86 | 离线量化 | +#### End2End Benchmark +| Model |Inference Backends |Hardware | FP32 End2End Latency | INT8 End2End Latency | INT8 + FP16 End2End Latency | INT8+FP16+PM End2End Latency | Max Speedup | FP32 mAP | INT8 mAP | Method | +| ------------------- | -----------------|-----------| -------- |-------- |-------- | --------- |-------- |----- |----- |----- | +| [ppyoloe_crn_l_300e_coco](../../examples/vision/detection/paddledetection/quantize ) | TensorRT | GPU | 35.75 | 15.42 |20.70|20.85 | 2.32 | 51.4 | 50.7 | Quantaware Distillation Training | +| [ppyoloe_crn_l_300e_coco](../../examples/vision/detection/paddledetection/quantize ) | Paddle-TensorRT | GPU | 33.48 |None | 18.47 |18.03 | 1.81 | 51.4 | 50.5| Quantaware Distillation Training | +| [ppyoloe_crn_l_300e_coco](../../examples/vision/detection/paddledetection/quantize) | ONNX Runtime | CPU | 1067.17 | 461.037 |None|None | 2.31 |51.4 | 50.0 |Quantaware Distillation Training | -上表中的数据, 为模型量化前后,在FastDeploy部署的Runtime推理性能. -- 测试数据为ImageNet-2012验证集中的图片. -- 推理时延为在不同Runtime上推理的时延, 单位是毫秒. -- CPU为Intel(R) Xeon(R) Gold 6271C, GPU为Tesla T4, TensorRT版本8.4.15, 所有测试中固定CPU线程数为1. + + +### PaddleSegSeries +#### Runtime Benchmark +| Model |Inference Backends |Hardware | FP32 Runtime Latency | INT8 Runtime Latency | INT8 + FP16 Runtime Latency | INT8+FP16+PM Runtime Latency | Max Speedup | FP32 mIoU | INT8 mIoU | Method | +| ------------------- | -----------------|-----------| -------- |-------- |-------- | --------- |-------- |----- |----- |----- | +| [PP-LiteSeg-T(STDC1)-cityscapes](../../examples/vision/segmentation/paddleseg/quantize) | Paddle Inference | CPU | 1138.04| 602.62 |None|None | 1.89 |77.37 | 71.62 |Quantaware Distillation Training | + +#### End2End Benchmark +| Model |Inference Backends |Hardware | FP32 End2End Latency | INT8 End2End Latency | INT8 + FP16 End2End Latency | INT8+FP16+PM End2End Latency | Max Speedup | FP32 mIoU | INT8 mIoU | Method | +| ------------------- | -----------------|-----------| -------- |-------- |-------- | --------- |-------- |----- |----- |----- | +| [PP-LiteSeg-T(STDC1)-cityscapes](../../examples/vision/segmentation/paddleseg/quantize) | Paddle Inference | CPU | 4726.65| 4134.91|None|None | 1.14 |77.37 | 71.62 |Quantaware Distillation Training | From 411baca0833e6fec6e5bcb772852c1bd43da21b6 Mon Sep 17 00:00:00 2001 From: yunyaoXYY Date: Wed, 21 Dec 2022 07:27:56 +0000 Subject: [PATCH 09/30] Fix PPOCR rec model bug --- fastdeploy/vision/ocr/ppocr/rec_preprocessor.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastdeploy/vision/ocr/ppocr/rec_preprocessor.cc b/fastdeploy/vision/ocr/ppocr/rec_preprocessor.cc index a965eb762..598b170a6 100644 --- a/fastdeploy/vision/ocr/ppocr/rec_preprocessor.cc +++ b/fastdeploy/vision/ocr/ppocr/rec_preprocessor.cc @@ -39,7 +39,7 @@ void OcrRecognizerResizeImage(FDMat* mat, float max_wh_ratio, } Resize::Run(mat, resize_w, img_h); - std::vector value = {0, 0, 0}; + std::vector value = {127, 127, 127}; Pad::Run(mat, 0, 0, 0, int(img_w - mat->Width()), value); } From affe91aa073e96a3f2837549ac93ab2254b92bb8 Mon Sep 17 00:00:00 2001 From: yunyaoXYY Date: Thu, 22 Dec 2022 08:29:22 +0000 Subject: [PATCH 10/30] Add new paddleseg quantization support --- tools/common_tools/auto_compression/README.md | 5 ++- .../segmentation/deeplabv3_resnet_quant.yaml | 37 ++++++++++++++++++ .../configs/segmentation/fcn_hrnet_quant.yaml | 36 ++++++++++++++++++ .../configs/segmentation/unet_quant.yaml | 38 +++++++++++++++++++ 4 files changed, 115 insertions(+), 1 deletion(-) create mode 100644 tools/common_tools/auto_compression/configs/segmentation/deeplabv3_resnet_quant.yaml create mode 100644 tools/common_tools/auto_compression/configs/segmentation/fcn_hrnet_quant.yaml create mode 100644 tools/common_tools/auto_compression/configs/segmentation/unet_quant.yaml diff --git a/tools/common_tools/auto_compression/README.md b/tools/common_tools/auto_compression/README.md index ed0717a42..dc642ec80 100644 --- a/tools/common_tools/auto_compression/README.md +++ b/tools/common_tools/auto_compression/README.md @@ -109,7 +109,10 @@ FastDeploy目前为用户提供了多个模型的压缩[config](./configs/)文 | [yolov7_quant](./configs/detection/yolov7_quant.yaml) | [yolov7](https://paddle-slim-models.bj.bcebos.com/act/yolov7.onnx) | | | [ppyoloe_withNMS_quant](./configs/detection/ppyoloe_withNMS_quant.yaml) | [ppyoloe_l](https://bj.bcebos.com/v1/paddle-slim-models/act/ppyoloe_crn_l_300e_coco.tar) | 支持PPYOLOE的s,m,l,x系列模型, 从PaddleDetection导出模型时正常导出, 不要去除NMS | | [ppyoloe_plus_withNMS_quant](./configs/detection/ppyoloe_plus_withNMS_quant.yaml) | [ppyoloe_plus_s](https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_plus_crn_s_80e_coco.tar) | 支持PPYOLOE+的s,m,l,x系列模型, 从PaddleDetection导出模型时正常导出, 不要去除NMS | -| [pp_liteseg_quant](./configs/segmentation/pp_liteseg_quant.yaml) | [pp_liteseg](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_T_STDC1_cityscapes_without_argmax_infer.tgz) | | +| [pp_liteseg_quant](./configs/segmentation/pp_liteseg_quant.yaml) | [pp_liteseg](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_T_STDC1_cityscapes_without_argmax_infer.tgz) | +| [deeplabv3_resnet_quant](./configs/segmentation/deeplabv3_resnet_quant.yaml) | [deeplabv3_resnet101](https://bj.bcebos.com/paddlehub/fastdeploy/Deeplabv3_ResNet101_OS8_cityscapes_without_argmax_infer.tgz) | | +| [fcn_hrnet_quant](./configs/segmentation/fcn_hrnet_quant.yaml) | [fcn_hrnet](https://bj.bcebos.com/paddlehub/fastdeploy/FCN_HRNet_W18_cityscapes_without_argmax_infer.tgz) | | +| [unet_quant](./configs/segmentation/unet_quant.yaml) | [unet](https://bj.bcebos.com/paddlehub/fastdeploy/Unet_cityscapes_without_argmax_infer.tgz) | | | diff --git a/tools/common_tools/auto_compression/configs/segmentation/deeplabv3_resnet_quant.yaml b/tools/common_tools/auto_compression/configs/segmentation/deeplabv3_resnet_quant.yaml new file mode 100644 index 000000000..2c473cd56 --- /dev/null +++ b/tools/common_tools/auto_compression/configs/segmentation/deeplabv3_resnet_quant.yaml @@ -0,0 +1,37 @@ +Global: + model_dir: ./Deeplabv3_ResNet101_OS8_cityscapes_without_argmax_infer/ + format: paddle + model_filename: model.pdmodel + params_filename: model.pdiparams + qat_image_path: ./train_stuttgart + ptq_image_path: ./train_stuttgart + input_list: ['x'] + qat_preprocess: ppseg_cityscapes_qat_preprocess + ptq_preprocess: ppseg_cityscapes_ptq_preprocess + qat_batch_size: 2 + + +Distillation: + alpha: 1.0 + loss: l2 + node: + - conv2d_225.tmp_0 + +QuantAware: + onnx_format: True + quantize_op_types: + - conv2d + - depthwise_conv2d + +TrainConfig: + epochs: 1 + eval_iter: 360 + learning_rate: 0.0001 + optimizer_builder: + optimizer: + type: SGD + weight_decay: 0.0005 + +PTQ: + calibration_method: 'avg' # option: avg, abs_max, hist, KL, mse + skip_tensor_list: None diff --git a/tools/common_tools/auto_compression/configs/segmentation/fcn_hrnet_quant.yaml b/tools/common_tools/auto_compression/configs/segmentation/fcn_hrnet_quant.yaml new file mode 100644 index 000000000..b1f3fde3d --- /dev/null +++ b/tools/common_tools/auto_compression/configs/segmentation/fcn_hrnet_quant.yaml @@ -0,0 +1,36 @@ +Global: + model_dir: ./FCN_HRNet_W18_cityscapes_without_argmax_infer + format: paddle + model_filename: model.pdmodel + params_filename: model.pdiparams + qat_image_path: ./train_stuttgart + ptq_image_path: ./train_stuttgart + input_list: ['x'] + qat_preprocess: ppseg_cityscapes_qat_preprocess + ptq_preprocess: ppseg_cityscapes_ptq_preprocess + qat_batch_size: 2 + +Distillation: + alpha: 1.0 + loss: l2 + node: + - conv2d_613.tmp_1 + +QuantAware: + onnx_format: True + quantize_op_types: + - conv2d + - depthwise_conv2d + +TrainConfig: + epochs: 20 + eval_iter: 360 + learning_rate: 0.0001 + optimizer_builder: + optimizer: + type: SGD + weight_decay: 4.0e-05 + +PTQ: + calibration_method: 'avg' # option: avg, abs_max, hist, KL, mse + skip_tensor_list: None diff --git a/tools/common_tools/auto_compression/configs/segmentation/unet_quant.yaml b/tools/common_tools/auto_compression/configs/segmentation/unet_quant.yaml new file mode 100644 index 000000000..dff13f936 --- /dev/null +++ b/tools/common_tools/auto_compression/configs/segmentation/unet_quant.yaml @@ -0,0 +1,38 @@ +Global: + model_dir: ./Unet_cityscapes_without_argmax_infer/ + format: paddle + model_filename: model.pdmodel + params_filename: model.pdiparams + qat_image_path: ./train_stuttgart + ptq_image_path: ./train_stuttgart + input_list: ['x'] + qat_preprocess: ppseg_cityscapes_qat_preprocess + ptq_preprocess: ppseg_cityscapes_ptq_preprocess + qat_batch_size: 2 + + +Distillation: + alpha: 1.0 + loss: l2 + node: + - conv2d_37.tmp_1 + +QuantAware: + onnx_format: True + quantize_op_types: + - conv2d + - depthwise_conv2d + + +TrainConfig: + epochs: 10 + eval_iter: 180 + learning_rate: 0.0005 + optimizer_builder: + optimizer: + type: SGD + weight_decay: 4.0e-05 + +PTQ: + calibration_method: 'avg' # option: avg, abs_max, hist, KL, mse + skip_tensor_list: None From 97328f9d2114b837caf7a306391ab4998f4812b5 Mon Sep 17 00:00:00 2001 From: yunyaoXYY Date: Thu, 22 Dec 2022 08:32:24 +0000 Subject: [PATCH 11/30] Add new paddleseg quantization support --- tools/common_tools/auto_compression/README_EN.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/common_tools/auto_compression/README_EN.md b/tools/common_tools/auto_compression/README_EN.md index c3b0cfee6..fc9e8caba 100644 --- a/tools/common_tools/auto_compression/README_EN.md +++ b/tools/common_tools/auto_compression/README_EN.md @@ -117,6 +117,10 @@ FastDeploy currently provides users with compression [config](./configs/) files | [ppyoloe_withNMS_quant](./configs/detection/ppyoloe_withNMS_quant.yaml) | [ppyoloe_l](https://bj.bcebos.com/v1/paddle-slim-models/act/ppyoloe_crn_l_300e_coco.tar) | Support PPYOLOE's s,m,l,x series models, export the model normally when exporting the model from PaddleDetection, do not remove NMS | | [ppyoloe_plus_withNMS_quant](./configs/detection/ppyoloe_plus_withNMS_quant.yaml) | [ppyoloe_plus_s](https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_plus_crn_s_80e_coco.tar) | Support PPYOLOE+'s s,m,l,x series models, export the model normally when exporting the model from PaddleDetection, do not remove NMS | | [pp_liteseg_quant](./configs/segmentation/pp_liteseg_quant.yaml) | [pp_liteseg](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_T_STDC1_cityscapes_without_argmax_infer.tgz) | | +| [deeplabv3_resnet_quant](./configs/segmentation/deeplabv3_resnet_quant.yaml) | [deeplabv3_resnet101](https://bj.bcebos.com/paddlehub/fastdeploy/Deeplabv3_ResNet101_OS8_cityscapes_without_argmax_infer.tgz) | | +| [fcn_hrnet_quant](./configs/segmentation/fcn_hrnet_quant.yaml) | [fcn_hrnet](https://bj.bcebos.com/paddlehub/fastdeploy/FCN_HRNet_W18_cityscapes_without_argmax_infer.tgz) | | +| [unet_quant](./configs/segmentation/unet_quant.yaml) | [unet](https://bj.bcebos.com/paddlehub/fastdeploy/Unet_cityscapes_without_argmax_infer.tgz) | | | + ## 3. Deploy quantized models on FastDeploy From f2747a4c7446a7b65a2060dd8e2a9e12412e2537 Mon Sep 17 00:00:00 2001 From: yunyaoXYY Date: Thu, 22 Dec 2022 08:34:09 +0000 Subject: [PATCH 12/30] Add new paddleseg quantization support --- tools/common_tools/auto_compression/README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/common_tools/auto_compression/README.md b/tools/common_tools/auto_compression/README.md index dc642ec80..8bc9d9312 100644 --- a/tools/common_tools/auto_compression/README.md +++ b/tools/common_tools/auto_compression/README.md @@ -103,7 +103,6 @@ FastDeploy目前为用户提供了多个模型的压缩[config](./configs/)文 | [mobilenetv3_large_x1_0_quant](./configs/classification/mobilenetv3_large_x1_0_quant.yaml) | [mobilenetv3_large_x1_0](https://bj.bcebos.com/paddlehub/fastdeploy/MobileNetV3_large_x1_0_ssld_infer.tgz) | | | [pphgnet_tiny_quant](./configs/classification/pphgnet_tiny_quant.yaml) | [pphgnet_tiny](https://bj.bcebos.com/paddlehub/fastdeploy/PPHGNet_tiny_ssld_infer.tgz) | | | [pplcnetv2_base_quant](./configs/classification/pplcnetv2_base_quant.yaml) | [pplcnetv2_base](https://bj.bcebos.com/paddlehub/fastdeploy/PPLCNetV2_base_infer.tgz) | | - | [yolov5s_quant](./configs/detection/yolov5s_quant.yaml) | [yolov5s](https://paddle-slim-models.bj.bcebos.com/act/yolov5s.onnx) | | | [yolov6s_quant](./configs/detection/yolov6s_quant.yaml) | [yolov6s](https://paddle-slim-models.bj.bcebos.com/act/yolov6s.onnx) | | | [yolov7_quant](./configs/detection/yolov7_quant.yaml) | [yolov7](https://paddle-slim-models.bj.bcebos.com/act/yolov7.onnx) | | From 27e0bd711fa58fc67e56e67394f40f2f7b8b6eee Mon Sep 17 00:00:00 2001 From: yunyaoXYY Date: Thu, 22 Dec 2022 08:35:46 +0000 Subject: [PATCH 13/30] Add new paddleseg quantization support --- tools/common_tools/auto_compression/README_EN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/common_tools/auto_compression/README_EN.md b/tools/common_tools/auto_compression/README_EN.md index fc9e8caba..564e235c3 100644 --- a/tools/common_tools/auto_compression/README_EN.md +++ b/tools/common_tools/auto_compression/README_EN.md @@ -103,7 +103,7 @@ To complete the quantization, developers only need to provide a customized model ## 3. FastDeploy One-Click Model Auto Compression Config file examples FastDeploy currently provides users with compression [config](./configs/) files of multiple models, and the corresponding FP32 model, Users can directly download and experience it. -| Config文件 | 待压缩的FP32模型 | 备注 | +| Config file | FP32 model | Note | | -------------------- | ------------------------------------------------------------ |----------------------------------------- | | [mobilenetv1_ssld_quant](./configs/classification/mobilenetv1_ssld_quant.yaml) | [mobilenetv1_ssld](https://bj.bcebos.com/paddlehub/fastdeploy/MobileNetV1_ssld_infer.tgz) | | | [resnet50_vd_quant](./configs/classification/resnet50_vd_quant.yaml) | [resnet50_vd](https://bj.bcebos.com/paddlehub/fastdeploy/ResNet50_vd_infer.tgz) | | From 2a1ac10b9ba26644f387e38f3a275deeface2490 Mon Sep 17 00:00:00 2001 From: yunyaoXYY Date: Wed, 28 Dec 2022 11:11:42 +0000 Subject: [PATCH 14/30] Add Ascend model list --- README_CN.md | 128 +++++++++++++++++++++++++-------------------------- 1 file changed, 64 insertions(+), 64 deletions(-) diff --git a/README_CN.md b/README_CN.md index 487fa1e68..1c2b7e523 100755 --- a/README_CN.md +++ b/README_CN.md @@ -188,73 +188,73 @@ int main(int argc, char* argv[]) { | 任务场景 | 模型 | Linux | Linux | Win | Win | Mac | Mac | Linux | Linux | Linux | Linux | Linux | Linux | |:----------------------:|:--------------------------------------------------------------------------------------------:|:------------------------------------------------:|:----------:|:-------:|:----------:|:-------:|:-------:|:-----------:|:---------------:|:-------------:|:-------------:|:-------:|:-------:| -| --- | --- | X86 CPU | NVIDIA GPU | X86 CPU | NVIDIA GPU | X86 CPU | Arm CPU | AArch64 CPU | 飞腾D2000 aarch64 | NVIDIA Jetson | Graphcore IPU | KunlunXin XPU | Serving | -| Classification | [PaddleClas/ResNet50](./examples/vision/classification/paddleclas) | [✅](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| Classification | [TorchVison/ResNet](examples/vision/classification/resnet) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | +| --- | --- | X86 CPU | NVIDIA GPU | X86 CPU | NVIDIA GPU | X86 CPU | Arm CPU | AArch64 CPU | 飞腾D2000 aarch64 | NVIDIA Jetson | Graphcore IPU | KunlunXin XPU | Huawei Ascend| Serving | +| Classification | [PaddleClas/ResNet50](./examples/vision/classification/paddleclas) | [✅](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | +| Classification | [TorchVison/ResNet](examples/vision/classification/resnet) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ❔ | | Classification | [ltralytics/YOLOv5Cls](examples/vision/classification/yolov5cls) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | -| Classification | [PaddleClas/PP-LCNet](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| Classification | [PaddleClas/PP-LCNetv2](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| Classification | [PaddleClas/EfficientNet](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| Classification | [PaddleClas/GhostNet](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| Classification | [PaddleClas/MobileNetV1](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| Classification | [PaddleClas/MobileNetV2](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| Classification | [PaddleClas/MobileNetV3](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| Classification | [PaddleClas/ShuffleNetV2](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| Classification | [PaddleClas/SqueeezeNetV1.1](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| Classification | [PaddleClas/Inceptionv3](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ✅ | -| Classification | [PaddleClas/PP-HGNet](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| Detection | [PaddleDetection/PP-YOLOE](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ✅ | -| Detection | [PaddleDetection/PicoDet](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ✅ | -| Detection | [PaddleDetection/YOLOX](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ✅ | -| Detection | [PaddleDetection/YOLOv3](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ✅ | -| Detection | [PaddleDetection/PP-YOLO](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ✅ | -| Detection | [PaddleDetection/PP-YOLOv2](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ✅ | -| Detection | [PaddleDetection/Faster-RCNN](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ✅ | -| Detection | [PaddleDetection/Mask-RCNN](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ✅ | -| Detection | [Megvii-BaseDetection/YOLOX](./examples/vision/detection/yolox) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | -| Detection | [WongKinYiu/YOLOv7](./examples/vision/detection/yolov7) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | -| Detection | [WongKinYiu/YOLOv7end2end_trt](./examples/vision/detection/yolov7end2end_trt) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ❔ | -| Detection | [WongKinYiu/YOLOv7end2end_ort_](./examples/vision/detection/yolov7end2end_ort) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | -| Detection | [meituan/YOLOv6](./examples/vision/detection/yolov6) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | +| Classification | [PaddleClas/PP-LCNet](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | +| Classification | [PaddleClas/PP-LCNetv2](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | +| Classification | [PaddleClas/EfficientNet](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | +| Classification | [PaddleClas/GhostNet](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | +| Classification | [PaddleClas/MobileNetV1](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | +| Classification | [PaddleClas/MobileNetV2](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | +| Classification | [PaddleClas/MobileNetV3](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | +| Classification | [PaddleClas/ShuffleNetV2](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | +| Classification | [PaddleClas/SqueeezeNetV1.1](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | +| Classification | [PaddleClas/Inceptionv3](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ✅ | +| Classification | [PaddleClas/PP-HGNet](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | +| Detection | [PaddleDetection/PP-YOLOE](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ✅ | +| Detection | [PaddleDetection/PicoDet](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ | +| Detection | [PaddleDetection/YOLOX](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ✅ | ✅ | +| Detection | [PaddleDetection/YOLOv3](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ✅ | ✅ | +| Detection | [PaddleDetection/PP-YOLO](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ✅ | ✅ | +| Detection | [PaddleDetection/PP-YOLOv2](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ✅ | ✅ | +| Detection | [PaddleDetection/Faster-RCNN](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |❔ | ✅ | +| Detection | [PaddleDetection/Mask-RCNN](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |❔ | ✅ | +| Detection | [Megvii-BaseDetection/YOLOX](./examples/vision/detection/yolox) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ❔ | +| Detection | [WongKinYiu/YOLOv7](./examples/vision/detection/yolov7) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ❔ | +| Detection | [WongKinYiu/YOLOv7end2end_trt](./examples/vision/detection/yolov7end2end_trt) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ |❔ | ❔ | +| Detection | [WongKinYiu/YOLOv7end2end_ort_](./examples/vision/detection/yolov7end2end_ort) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ |❔ | ❔ | +| Detection | [meituan/YOLOv6](./examples/vision/detection/yolov6) | ✅ | ✅ | ✅ |✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | | Detection | [ultralytics/YOLOv5](./examples/vision/detection/yolov5) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ✅ | -| Detection | [WongKinYiu/YOLOR](./examples/vision/detection/yolor) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ❔ | -| Detection | [WongKinYiu/ScaledYOLOv4](./examples/vision/detection/scaledyolov4) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | -| Detection | [ppogg/YOLOv5Lite](./examples/vision/detection/yolov5lite) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ? | ❔ |❔ | -| Detection | [RangiLyu/NanoDetPlus](./examples/vision/detection/nanodet_plus) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | -| KeyPoint | [PaddleDetection/TinyPose](./examples/vision/keypointdetection/tiny_pose) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | -| KeyPoint | [PaddleDetection/PicoDet + TinyPose](./examples/vision/keypointdetection/det_keypoint_unite) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | -| HeadPose | [omasaht/headpose](examples/vision/headpose) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ❔ | -| Tracking | [PaddleDetection/PP-Tracking](examples/vision/tracking/pptracking) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | -| OCR | [PaddleOCR/PP-OCRv2](./examples/vision/ocr) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ | ❔ | -| OCR | [PaddleOCR/PP-OCRv3](./examples/vision/ocr) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ✅ | -| Segmentation | [PaddleSeg/PP-LiteSeg](./examples/vision/segmentation/paddleseg) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ | ❔ | -| Segmentation | [PaddleSeg/PP-HumanSegLite](./examples/vision/segmentation/paddleseg) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ | ❔ | -| Segmentation | [PaddleSeg/HRNet](./examples/vision/segmentation/paddleseg) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ | ❔ | -| Segmentation | [PaddleSeg/PP-HumanSegServer](./examples/vision/segmentation/paddleseg) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ | ❔ | -| Segmentation | [PaddleSeg/Unet](./examples/vision/segmentation/paddleseg) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ✅ | ✅ | ❔ | -| Segmentation | [PaddleSeg/Deeplabv3](./examples/vision/segmentation/paddleseg) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ | ❔ | -| FaceDetection | [biubug6/RetinaFace](./examples/vision/facedet/retinaface) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | -| FaceDetection | [Linzaer/UltraFace](./examples/vision/facedet/ultraface) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | -| FaceDetection | [deepcam-cn/YOLOv5Face](./examples/vision/facedet/yolov5face) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | -| FaceDetection | [insightface/SCRFD](./examples/vision/facedet/scrfd) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | -| FaceAlign | [Hsintao/PFLD](examples/vision/facealign/pfld) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ |❔ | -| FaceAlign | [Single430/FaceLandmark1000](./examples/vision/facealign/face_landmark_1000) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ❔ | -| FaceAlign | [jhb86253817/PIPNet](./examples/vision/facealign) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ |❔ | -| FaceRecognition | [insightface/ArcFace](./examples/vision/faceid/insightface) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | +| Detection | [WongKinYiu/YOLOR](./examples/vision/detection/yolor) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ✅ | ❔ | +| Detection | [WongKinYiu/ScaledYOLOv4](./examples/vision/detection/scaledyolov4) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ |❔ | ❔ | +| Detection | [ppogg/YOLOv5Lite](./examples/vision/detection/yolov5lite) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ? | ❔ |❔ |❔ | +| Detection | [RangiLyu/NanoDetPlus](./examples/vision/detection/nanodet_plus) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ |❔ | ❔ | +| KeyPoint | [PaddleDetection/TinyPose](./examples/vision/keypointdetection/tiny_pose) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |❔ | ❔ | +| KeyPoint | [PaddleDetection/PicoDet + TinyPose](./examples/vision/keypointdetection/det_keypoint_unite) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ |❔ | +| HeadPose | [omasaht/headpose](examples/vision/headpose) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ❔ |❔ | +| Tracking | [PaddleDetection/PP-Tracking](examples/vision/tracking/pptracking) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ |❔ | +| OCR | [PaddleOCR/PP-OCRv2](./examples/vision/ocr) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ |✅ | | ❔ | +| OCR | [PaddleOCR/PP-OCRv3](./examples/vision/ocr) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ✅ | +| Segmentation | [PaddleSeg/PP-LiteSeg](./examples/vision/segmentation/paddleseg) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ |❔ | ❔ | +| Segmentation | [PaddleSeg/PP-HumanSegLite](./examples/vision/segmentation/paddleseg) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ |✅ | ❔ | +| Segmentation | [PaddleSeg/HRNet](./examples/vision/segmentation/paddleseg) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ | ✅ |❔ | +| Segmentation | [PaddleSeg/PP-HumanSegServer](./examples/vision/segmentation/paddleseg) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ | ✅ |❔ | +| Segmentation | [PaddleSeg/Unet](./examples/vision/segmentation/paddleseg) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ✅ | ✅ | ✅ |❔ | +| Segmentation | [PaddleSeg/Deeplabv3](./examples/vision/segmentation/paddleseg) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ | ✅ |❔ | +| FaceDetection | [biubug6/RetinaFace](./examples/vision/facedet/retinaface) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | ❔ | +| FaceDetection | [Linzaer/UltraFace](./examples/vision/facedet/ultraface) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ |❔ | +| FaceDetection | [deepcam-cn/YOLOv5Face](./examples/vision/facedet/yolov5face) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ |❔ | +| FaceDetection | [insightface/SCRFD](./examples/vision/facedet/scrfd) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ |❔ | +| FaceAlign | [Hsintao/PFLD](examples/vision/facealign/pfld) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ |❔ | +| FaceAlign | [Single430/FaceLandmark1000](./examples/vision/facealign/face_landmark_1000) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ❔ | ❔ | +| FaceAlign | [jhb86253817/PIPNet](./examples/vision/facealign) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ❔ |❔ | +| FaceRecognition | [insightface/ArcFace](./examples/vision/faceid/insightface) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ |❔ | | FaceRecognition | [insightface/CosFace](./examples/vision/faceid/insightface) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | -| FaceRecognition | [insightface/PartialFC](./examples/vision/faceid/insightface) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | -| FaceRecognition | [insightface/VPL](./examples/vision/faceid/insightface) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | -| Matting | [ZHKKKe/MODNet](./examples/vision/matting/modnet) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ |❔ | -| Matting | [PeterL1n/RobustVideoMatting]() | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ❔ | -| Matting | [PaddleSeg/PP-Matting](./examples/vision/matting/ppmatting) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | -| Matting | [PaddleSeg/PP-HumanMatting](./examples/vision/matting/modnet) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |❔ | -| Matting | [PaddleSeg/ModNet](./examples/vision/matting/modnet) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | -| Video Super-Resolution | [PaddleGAN/BasicVSR](./) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ❔ | -| Video Super-Resolution | [PaddleGAN/EDVR](./examples/vision/sr/edvr) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ❔ | -| Video Super-Resolution | [PaddleGAN/PP-MSVSR](./examples/vision/sr/ppmsvsr) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ❔ | -| Information Extraction | [PaddleNLP/UIE](./examples/text/uie) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | | -| NLP | [PaddleNLP/ERNIE-3.0](./examples/text/ernie-3.0) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | ✅ | ✅ | -| Speech | [PaddleSpeech/PP-TTS](./examples/audio/pp-tts) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | -- |❔ | ✅ | +| FaceRecognition | [insightface/PartialFC](./examples/vision/faceid/insightface) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | ❔ | +| FaceRecognition | [insightface/VPL](./examples/vision/faceid/insightface) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | ❔ | +| Matting | [ZHKKKe/MODNet](./examples/vision/matting/modnet) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ❔ |❔ | +| Matting | [PeterL1n/RobustVideoMatting]() | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ❔ | ❔ | +| Matting | [PaddleSeg/PP-Matting](./examples/vision/matting/ppmatting) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ❔ | +| Matting | [PaddleSeg/PP-HumanMatting](./examples/vision/matting/modnet) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ |❔ | +| Matting | [PaddleSeg/ModNet](./examples/vision/matting/modnet) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ |❔ | ❔ | +| Video Super-Resolution | [PaddleGAN/BasicVSR](./) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ❔ |❔ | +| Video Super-Resolution | [PaddleGAN/EDVR](./examples/vision/sr/edvr) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ |❔ | ❔ | +| Video Super-Resolution | [PaddleGAN/PP-MSVSR](./examples/vision/sr/ppmsvsr) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ |❔ | ❔ | +| Information Extraction | [PaddleNLP/UIE](./examples/text/uie) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ |❔ | | +| NLP | [PaddleNLP/ERNIE-3.0](./examples/text/ernie-3.0) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | ✅ |❔ | ✅ | +| Speech | [PaddleSpeech/PP-TTS](./examples/audio/pp-tts) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | -- |❔ |❔ | ✅ | From 64d67dadfbb0e09ef7bdaca1bad18c62468257f6 Mon Sep 17 00:00:00 2001 From: yunyaoXYY Date: Wed, 28 Dec 2022 11:13:30 +0000 Subject: [PATCH 15/30] Add ascend model list --- README_CN.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README_CN.md b/README_CN.md index 1c2b7e523..79503e9fd 100755 --- a/README_CN.md +++ b/README_CN.md @@ -186,8 +186,8 @@ int main(int argc, char* argv[]) { -| 任务场景 | 模型 | Linux | Linux | Win | Win | Mac | Mac | Linux | Linux | Linux | Linux | Linux | Linux | -|:----------------------:|:--------------------------------------------------------------------------------------------:|:------------------------------------------------:|:----------:|:-------:|:----------:|:-------:|:-------:|:-----------:|:---------------:|:-------------:|:-------------:|:-------:|:-------:| +| 任务场景 | 模型 | Linux | Linux | Win | Win | Mac | Mac | Linux | Linux | Linux | Linux | Linux | Linux | Linux | +|:----------------------:|:--------------------------------------------------------------------------------------------:|:------------------------------------------------:|:----------:|:-------:|:----------:|:-------:|:-------:|:-----------:|:---------------:|:-------------:|:-------------:|:-------:|:-------:|:-------:| | --- | --- | X86 CPU | NVIDIA GPU | X86 CPU | NVIDIA GPU | X86 CPU | Arm CPU | AArch64 CPU | 飞腾D2000 aarch64 | NVIDIA Jetson | Graphcore IPU | KunlunXin XPU | Huawei Ascend| Serving | | Classification | [PaddleClas/ResNet50](./examples/vision/classification/paddleclas) | [✅](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | | Classification | [TorchVison/ResNet](examples/vision/classification/resnet) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ❔ | From 6286ad2348508d0b008fe21ea0044838461593c0 Mon Sep 17 00:00:00 2001 From: yunyaoXYY Date: Wed, 28 Dec 2022 11:17:50 +0000 Subject: [PATCH 16/30] Add ascend model list --- README_CN.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README_CN.md b/README_CN.md index 79503e9fd..1d2e72eec 100755 --- a/README_CN.md +++ b/README_CN.md @@ -191,7 +191,7 @@ int main(int argc, char* argv[]) { | --- | --- | X86 CPU | NVIDIA GPU | X86 CPU | NVIDIA GPU | X86 CPU | Arm CPU | AArch64 CPU | 飞腾D2000 aarch64 | NVIDIA Jetson | Graphcore IPU | KunlunXin XPU | Huawei Ascend| Serving | | Classification | [PaddleClas/ResNet50](./examples/vision/classification/paddleclas) | [✅](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | | Classification | [TorchVison/ResNet](examples/vision/classification/resnet) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ❔ | -| Classification | [ltralytics/YOLOv5Cls](examples/vision/classification/yolov5cls) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | +| Classification | [ltralytics/YOLOv5Cls](examples/vision/classification/yolov5cls) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ |❔ | | Classification | [PaddleClas/PP-LCNet](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | | Classification | [PaddleClas/PP-LCNetv2](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | | Classification | [PaddleClas/EfficientNet](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | @@ -216,7 +216,7 @@ int main(int argc, char* argv[]) { | Detection | [WongKinYiu/YOLOv7end2end_trt](./examples/vision/detection/yolov7end2end_trt) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ |❔ | ❔ | | Detection | [WongKinYiu/YOLOv7end2end_ort_](./examples/vision/detection/yolov7end2end_ort) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ |❔ | ❔ | | Detection | [meituan/YOLOv6](./examples/vision/detection/yolov6) | ✅ | ✅ | ✅ |✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | -| Detection | [ultralytics/YOLOv5](./examples/vision/detection/yolov5) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ✅ | +| Detection | [ultralytics/YOLOv5](./examples/vision/detection/yolov5) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ✅ |✅ | | Detection | [WongKinYiu/YOLOR](./examples/vision/detection/yolor) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ✅ | ❔ | | Detection | [WongKinYiu/ScaledYOLOv4](./examples/vision/detection/scaledyolov4) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ |❔ | ❔ | | Detection | [ppogg/YOLOv5Lite](./examples/vision/detection/yolov5lite) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ? | ❔ |❔ |❔ | @@ -225,7 +225,7 @@ int main(int argc, char* argv[]) { | KeyPoint | [PaddleDetection/PicoDet + TinyPose](./examples/vision/keypointdetection/det_keypoint_unite) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ |❔ | | HeadPose | [omasaht/headpose](examples/vision/headpose) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ❔ |❔ | | Tracking | [PaddleDetection/PP-Tracking](examples/vision/tracking/pptracking) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ |❔ | -| OCR | [PaddleOCR/PP-OCRv2](./examples/vision/ocr) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ |✅ | | ❔ | +| OCR | [PaddleOCR/PP-OCRv2](./examples/vision/ocr) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ |✅ | ❔ | | OCR | [PaddleOCR/PP-OCRv3](./examples/vision/ocr) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ✅ | | Segmentation | [PaddleSeg/PP-LiteSeg](./examples/vision/segmentation/paddleseg) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ |❔ | ❔ | | Segmentation | [PaddleSeg/PP-HumanSegLite](./examples/vision/segmentation/paddleseg) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ |✅ | ❔ | @@ -241,7 +241,7 @@ int main(int argc, char* argv[]) { | FaceAlign | [Single430/FaceLandmark1000](./examples/vision/facealign/face_landmark_1000) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ❔ | ❔ | | FaceAlign | [jhb86253817/PIPNet](./examples/vision/facealign) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ❔ |❔ | | FaceRecognition | [insightface/ArcFace](./examples/vision/faceid/insightface) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ |❔ | -| FaceRecognition | [insightface/CosFace](./examples/vision/faceid/insightface) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | +| FaceRecognition | [insightface/CosFace](./examples/vision/faceid/insightface) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ |❔ | | FaceRecognition | [insightface/PartialFC](./examples/vision/faceid/insightface) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | ❔ | | FaceRecognition | [insightface/VPL](./examples/vision/faceid/insightface) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | ❔ | | Matting | [ZHKKKe/MODNet](./examples/vision/matting/modnet) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ❔ |❔ | From 22d9fcee90cf79fc0c95c474e1cca0a9c7c80e86 Mon Sep 17 00:00:00 2001 From: yunyaoXYY Date: Wed, 28 Dec 2022 11:19:27 +0000 Subject: [PATCH 17/30] Add ascend model list --- README_EN.md | 139 ++++++++++++++++++++++++++------------------------- 1 file changed, 70 insertions(+), 69 deletions(-) diff --git a/README_EN.md b/README_EN.md index 639d07f67..e132e40ee 100755 --- a/README_EN.md +++ b/README_EN.md @@ -187,74 +187,75 @@ Notes: ✅: already supported; ❔: to be supported in the future; N/A: Not Ava | Task | Model | Linux | Linux | Win | Win | Mac | Mac | Linux | Linux | Linux | Linux | Linux | Linux | -|:----------------------:|:--------------------------------------------------------------------------------------------:|:------------------------------------------------:|:------------------------:|:------------------------:|:------------------------:|:-----------------------:|:---------------------:|:--------------------------:| :----------------: |:---------------------------:|:---------------------------:|:-------:|:-------:| -| --- | --- | X86 CPU | NVIDIA GPU | Intel CPU | NVIDIA GPU | Intel CPU | Arm CPU | AArch64 CPU | Phytium D2000CPU | NVIDIA Jetson | Graphcore IPU | KunlunXin XPU |Serving | -| Classification | [PaddleClas/ResNet50](./examples/vision/classification/paddleclas) | [✅](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| Classification | [TorchVison/ResNet](examples/vision/classification/resnet) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | -| Classification | [ltralytics/YOLOv5Cls](examples/vision/classification/yolov5cls) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | -| Classification | [PaddleClas/PP-LCNet](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| Classification | [PaddleClas/PP-LCNetv2](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| Classification | [PaddleClas/EfficientNet](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| Classification | [PaddleClas/GhostNet](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| Classification | [PaddleClas/MobileNetV1](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| Classification | [PaddleClas/MobileNetV2](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| Classification | [PaddleClas/MobileNetV3](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| Classification | [PaddleClas/ShuffleNetV2](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| Classification | [PaddleClas/SqueeezeNetV1.1](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| Classification | [PaddleClas/Inceptionv3](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ✅ | -| Classification | [PaddleClas/PP-HGNet](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | -| Detection | [PaddleDetection/PP-YOLOE](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ✅ | -| Detection | [PaddleDetection/PicoDet](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ✅ | -| Detection | [PaddleDetection/YOLOX](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ✅ | -| Detection | [PaddleDetection/YOLOv3](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ✅ | -| Detection | [PaddleDetection/PP-YOLO](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ✅ | -| Detection | [PaddleDetection/PP-YOLOv2](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ✅ | -| Detection | [PaddleDetection/Faster-RCNN](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ✅ | -| Detection | [PaddleDetection/Mask-RCNN](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ✅ | -| Detection | [Megvii-BaseDetection/YOLOX](./examples/vision/detection/yolox) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | -| Detection | [WongKinYiu/YOLOv7](./examples/vision/detection/yolov7) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | -| Detection | [WongKinYiu/YOLOv7end2end_trt](./examples/vision/detection/yolov7end2end_trt) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ❔ | -| Detection | [WongKinYiu/YOLOv7end2end_ort_](./examples/vision/detection/yolov7end2end_ort) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | -| Detection | [meituan/YOLOv6](./examples/vision/detection/yolov6) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | -| Detection | [ultralytics/YOLOv5](./examples/vision/detection/yolov5) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ✅ | -| Detection | [WongKinYiu/YOLOR](./examples/vision/detection/yolor) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ❔ | -| Detection | [WongKinYiu/ScaledYOLOv4](./examples/vision/detection/scaledyolov4) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | -| Detection | [ppogg/YOLOv5Lite](./examples/vision/detection/yolov5lite) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | -| Detection | [RangiLyu/NanoDetPlus](./examples/vision/detection/nanodet_plus) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | -| KeyPoint | [PaddleDetection/TinyPose](./examples/vision/keypointdetection/tiny_pose) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | -| KeyPoint | [PaddleDetection/PicoDet + TinyPose](./examples/vision/keypointdetection/det_keypoint_unite) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | -| HeadPose | [omasaht/headpose](examples/vision/headpose) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ❔ | -| Tracking | [PaddleDetection/PP-Tracking](examples/vision/tracking/pptracking) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | -| OCR | [PaddleOCR/PP-OCRv2](./examples/vision/ocr) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ | ❔ | -| OCR | [PaddleOCR/PP-OCRv3](./examples/vision/ocr) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ✅ | -| Segmentation | [PaddleSeg/PP-LiteSeg](./examples/vision/segmentation/paddleseg) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ | ❔ | -| Segmentation | [PaddleSeg/PP-HumanSegLite](./examples/vision/segmentation/paddleseg) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ | ❔ | -| Segmentation | [PaddleSeg/HRNet](./examples/vision/segmentation/paddleseg) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ | ❔ | -| Segmentation | [PaddleSeg/PP-HumanSegServer](./examples/vision/segmentation/paddleseg) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ | ❔ | -| Segmentation | [PaddleSeg/Unet](./examples/vision/segmentation/paddleseg) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ | ❔ | -| Segmentation | [PaddleSeg/Deeplabv3](./examples/vision/segmentation/paddleseg) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ | ❔ | -| FaceDetection | [biubug6/RetinaFace](./examples/vision/facedet/retinaface) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | -| FaceDetection | [Linzaer/UltraFace](./examples/vision/facedet/ultraface) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | -| FaceDetection | [deepcam-cn/YOLOv5Face](./examples/vision/facedet/yolov5face) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | -| FaceDetection | [insightface/SCRFD](./examples/vision/facedet/scrfd) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | -| FaceAlign | [Hsintao/PFLD](examples/vision/facealign/pfld) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | -| FaceAlign | [Single430FaceLandmark1000](./examples/vision/facealign/face_landmark_1000) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ❔ | -| FaceAlign | [jhb86253817/PIPNet](./examples/vision/facealign) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ❔ | -| FaceRecognition | [insightface/ArcFace](./examples/vision/faceid/insightface) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | -| FaceRecognition | [insightface/CosFace](./examples/vision/faceid/insightface) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | -| FaceRecognition | [insightface/PartialFC](./examples/vision/faceid/insightface) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | -| FaceRecognition | [insightface/VPL](./examples/vision/faceid/insightface) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | -| Matting | [ZHKKKe/MODNet](./examples/vision/matting/modnet) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ❔ | -| Matting | [PeterL1n/RobustVideoMatting]() | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ❔ | -| Matting | [PaddleSeg/PP-Matting](./examples/vision/matting/ppmatting) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | -| Matting | [PaddleSeg/PP-HumanMatting](./examples/vision/matting/modnet) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | -| Matting | [PaddleSeg/ModNet](./examples/vision/matting/modnet) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | -| Video Super-Resolution | [PaddleGAN/BasicVSR](./) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ❔ | -| Video Super-Resolution | [PaddleGAN/EDVR](./examples/vision/sr/edvr) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ❔ | -| Video Super-Resolution | [PaddleGAN/PP-MSVSR](./examples/vision/sr/ppmsvsr) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ❔ | -| Information Extraction | [PaddleNLP/UIE](./examples/text/uie) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | | -| NLP | [PaddleNLP/ERNIE-3.0](./examples/text/ernie-3.0) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | ✅ | ✅ | -| Speech | [PaddleSpeech/PP-TTS](./examples/audio/pp-tts) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | -- | ❔ | ✅ | +|:----------------------:|:--------------------------------------------------------------------------------------------:|:------------------------------------------------:|:----------:|:-------:|:----------:|:-------:|:-------:|:-----------:|:---------------:|:-------------:|:-------------:|:-------:|:-------:|:-------:| +| --- | --- | X86 CPU | NVIDIA GPU | X86 CPU | NVIDIA GPU | X86 CPU | Arm CPU | AArch64 CPU | 飞腾D2000 aarch64 | NVIDIA Jetson | Graphcore IPU | KunlunXin XPU | Huawei Ascend| Serving | +| Classification | [PaddleClas/ResNet50](./examples/vision/classification/paddleclas) | [✅](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | +| Classification | [TorchVison/ResNet](examples/vision/classification/resnet) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ❔ | +| Classification | [ltralytics/YOLOv5Cls](examples/vision/classification/yolov5cls) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ |❔ | +| Classification | [PaddleClas/PP-LCNet](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | +| Classification | [PaddleClas/PP-LCNetv2](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | +| Classification | [PaddleClas/EfficientNet](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | +| Classification | [PaddleClas/GhostNet](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | +| Classification | [PaddleClas/MobileNetV1](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | +| Classification | [PaddleClas/MobileNetV2](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | +| Classification | [PaddleClas/MobileNetV3](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | +| Classification | [PaddleClas/ShuffleNetV2](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | +| Classification | [PaddleClas/SqueeezeNetV1.1](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | +| Classification | [PaddleClas/Inceptionv3](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ✅ | +| Classification | [PaddleClas/PP-HGNet](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | +| Detection | [PaddleDetection/PP-YOLOE](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ✅ | +| Detection | [PaddleDetection/PicoDet](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ | +| Detection | [PaddleDetection/YOLOX](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ✅ | ✅ | +| Detection | [PaddleDetection/YOLOv3](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ✅ | ✅ | +| Detection | [PaddleDetection/PP-YOLO](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ✅ | ✅ | +| Detection | [PaddleDetection/PP-YOLOv2](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ✅ | ✅ | +| Detection | [PaddleDetection/Faster-RCNN](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |❔ | ✅ | +| Detection | [PaddleDetection/Mask-RCNN](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |❔ | ✅ | +| Detection | [Megvii-BaseDetection/YOLOX](./examples/vision/detection/yolox) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ❔ | +| Detection | [WongKinYiu/YOLOv7](./examples/vision/detection/yolov7) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ❔ | +| Detection | [WongKinYiu/YOLOv7end2end_trt](./examples/vision/detection/yolov7end2end_trt) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ |❔ | ❔ | +| Detection | [WongKinYiu/YOLOv7end2end_ort_](./examples/vision/detection/yolov7end2end_ort) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ |❔ | ❔ | +| Detection | [meituan/YOLOv6](./examples/vision/detection/yolov6) | ✅ | ✅ | ✅ |✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | +| Detection | [ultralytics/YOLOv5](./examples/vision/detection/yolov5) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ✅ |✅ | +| Detection | [WongKinYiu/YOLOR](./examples/vision/detection/yolor) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ✅ | ❔ | +| Detection | [WongKinYiu/ScaledYOLOv4](./examples/vision/detection/scaledyolov4) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ |❔ | ❔ | +| Detection | [ppogg/YOLOv5Lite](./examples/vision/detection/yolov5lite) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ? | ❔ |❔ |❔ | +| Detection | [RangiLyu/NanoDetPlus](./examples/vision/detection/nanodet_plus) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ |❔ | ❔ | +| KeyPoint | [PaddleDetection/TinyPose](./examples/vision/keypointdetection/tiny_pose) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |❔ | ❔ | +| KeyPoint | [PaddleDetection/PicoDet + TinyPose](./examples/vision/keypointdetection/det_keypoint_unite) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ |❔ | +| HeadPose | [omasaht/headpose](examples/vision/headpose) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ❔ |❔ | +| Tracking | [PaddleDetection/PP-Tracking](examples/vision/tracking/pptracking) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ |❔ | +| OCR | [PaddleOCR/PP-OCRv2](./examples/vision/ocr) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ |✅ | ❔ | +| OCR | [PaddleOCR/PP-OCRv3](./examples/vision/ocr) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ✅ | +| Segmentation | [PaddleSeg/PP-LiteSeg](./examples/vision/segmentation/paddleseg) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ |❔ | ❔ | +| Segmentation | [PaddleSeg/PP-HumanSegLite](./examples/vision/segmentation/paddleseg) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ |✅ | ❔ | +| Segmentation | [PaddleSeg/HRNet](./examples/vision/segmentation/paddleseg) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ | ✅ |❔ | +| Segmentation | [PaddleSeg/PP-HumanSegServer](./examples/vision/segmentation/paddleseg) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ | ✅ |❔ | +| Segmentation | [PaddleSeg/Unet](./examples/vision/segmentation/paddleseg) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ✅ | ✅ | ✅ |❔ | +| Segmentation | [PaddleSeg/Deeplabv3](./examples/vision/segmentation/paddleseg) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ | ✅ |❔ | +| FaceDetection | [biubug6/RetinaFace](./examples/vision/facedet/retinaface) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | ❔ | +| FaceDetection | [Linzaer/UltraFace](./examples/vision/facedet/ultraface) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ |❔ | +| FaceDetection | [deepcam-cn/YOLOv5Face](./examples/vision/facedet/yolov5face) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ |❔ | +| FaceDetection | [insightface/SCRFD](./examples/vision/facedet/scrfd) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ |❔ | +| FaceAlign | [Hsintao/PFLD](examples/vision/facealign/pfld) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ |❔ | +| FaceAlign | [Single430/FaceLandmark1000](./examples/vision/facealign/face_landmark_1000) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ❔ | ❔ | +| FaceAlign | [jhb86253817/PIPNet](./examples/vision/facealign) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ❔ |❔ | +| FaceRecognition | [insightface/ArcFace](./examples/vision/faceid/insightface) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ |❔ | +| FaceRecognition | [insightface/CosFace](./examples/vision/faceid/insightface) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ |❔ | +| FaceRecognition | [insightface/PartialFC](./examples/vision/faceid/insightface) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | ❔ | +| FaceRecognition | [insightface/VPL](./examples/vision/faceid/insightface) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | ❔ | +| Matting | [ZHKKKe/MODNet](./examples/vision/matting/modnet) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ❔ |❔ | +| Matting | [PeterL1n/RobustVideoMatting]() | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ❔ | ❔ | +| Matting | [PaddleSeg/PP-Matting](./examples/vision/matting/ppmatting) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ❔ | +| Matting | [PaddleSeg/PP-HumanMatting](./examples/vision/matting/modnet) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ |❔ | +| Matting | [PaddleSeg/ModNet](./examples/vision/matting/modnet) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ |❔ | ❔ | +| Video Super-Resolution | [PaddleGAN/BasicVSR](./) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ | ❔ |❔ | +| Video Super-Resolution | [PaddleGAN/EDVR](./examples/vision/sr/edvr) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ |❔ | ❔ | +| Video Super-Resolution | [PaddleGAN/PP-MSVSR](./examples/vision/sr/ppmsvsr) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ |❔ | ❔ | +| Information Extraction | [PaddleNLP/UIE](./examples/text/uie) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ |❔ | | +| NLP | [PaddleNLP/ERNIE-3.0](./examples/text/ernie-3.0) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | ❔ | ✅ |❔ | ✅ | +| Speech | [PaddleSpeech/PP-TTS](./examples/audio/pp-tts) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ❔ | -- |❔ |❔ | ✅ | + @@ -319,7 +320,7 @@ Notes: ✅: already supported; ❔: to be supported in the future; N/A: Not Ava | OCR | [PaddleOCR/PP-OCRv3](./examples/application/js/web_demo/src/pages/cv/ocr) | ✅ | - + ## 👬 Community - **Slack**:Join our [Slack community](https://join.slack.com/t/fastdeployworkspace/shared_invite/zt-1jznah134-3rxY~ytRb8rcPqkn9g~PDg) and chat with other community members about ideas. From ab9dc22640eb015c66cec51ff0715d29e6e526fe Mon Sep 17 00:00:00 2001 From: yunyaoXYY Date: Wed, 28 Dec 2022 11:22:39 +0000 Subject: [PATCH 18/30] Add ascend model list --- README_EN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README_EN.md b/README_EN.md index e132e40ee..67fe5c443 100755 --- a/README_EN.md +++ b/README_EN.md @@ -186,7 +186,7 @@ Notes: ✅: already supported; ❔: to be supported in the future; N/A: Not Ava -| Task | Model | Linux | Linux | Win | Win | Mac | Mac | Linux | Linux | Linux | Linux | Linux | Linux | +| Task | Model | Linux | Linux | Win | Win | Mac | Mac | Linux | Linux | Linux | Linux | Linux | Linux |Linux | |:----------------------:|:--------------------------------------------------------------------------------------------:|:------------------------------------------------:|:----------:|:-------:|:----------:|:-------:|:-------:|:-----------:|:---------------:|:-------------:|:-------------:|:-------:|:-------:|:-------:| | --- | --- | X86 CPU | NVIDIA GPU | X86 CPU | NVIDIA GPU | X86 CPU | Arm CPU | AArch64 CPU | 飞腾D2000 aarch64 | NVIDIA Jetson | Graphcore IPU | KunlunXin XPU | Huawei Ascend| Serving | | Classification | [PaddleClas/ResNet50](./examples/vision/classification/paddleclas) | [✅](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | From 1f6b9eff71d32c0161d921cdc3b407a6367d9197 Mon Sep 17 00:00:00 2001 From: yunyaoXYY Date: Wed, 28 Dec 2022 11:42:03 +0000 Subject: [PATCH 19/30] Add ascend model list --- README_CN.md | 2 +- README_EN.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README_CN.md b/README_CN.md index 778970f41..0962978bb 100755 --- a/README_CN.md +++ b/README_CN.md @@ -187,7 +187,7 @@ int main(int argc, char* argv[]) { | 任务场景 | 模型 | Linux | Linux | Win | Win | Mac | Mac | Linux | Linux | Linux | Linux | Linux | Linux | Linux | -|:----------------------:|:--------------------------------------------------------------------------------------------:|:------------------------------------------------:|:----------:|:-------:|:----------:|:-------:|:-------:|:-----------:|:---------------:|:-------------:|:-------------:|:-------:|:-------:|:-------:|| +|:----------------------:|:--------------------------------------------------------------------------------------------:|:------------------------------------------------:|:----------:|:-------:|:----------:|:-------:|:-------:|:-----------:|:---------------:|:-------------:|:-------------:|:-------:|:-------:|:-------:|:-------:| | --- | --- | X86 CPU | NVIDIA GPU | X86 CPU | NVIDIA GPU | X86 CPU | Arm CPU | AArch64 CPU | 飞腾D2000 aarch64 | [NVIDIA Jetson](./docs/cn/build_and_install/jetson.md) | [Graphcore IPU](./docs/cn/build_and_install/ipu.md) | [KunlunXin XPU](./docs/cn/build_and_install/kunlunxin.md) |[Huawei Ascend](./docs/cn/build_and_install/huawei_ascend.md) | [Serving](./serving) | | Classification | [PaddleClas/ResNet50](./examples/vision/classification/paddleclas) | [✅](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | | Classification | [TorchVison/ResNet](examples/vision/classification/resnet) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ❔ | diff --git a/README_EN.md b/README_EN.md index 0f1cef5aa..355567cb6 100755 --- a/README_EN.md +++ b/README_EN.md @@ -187,7 +187,7 @@ Notes: ✅: already supported; ❔: to be supported in the future; N/A: Not Ava | Task | Model | Linux | Linux | Win | Win | Mac | Mac | Linux | Linux | Linux | Linux | Linux | Linux |Linux | -|:----------------------:|:--------------------------------------------------------------------------------------------:|:------------------------------------------------:|:----------:|:-------:|:----------:|:-------:|:-------:|:-----------:|:---------------:|:-------------:|:-------------:|:-------:|:-------:|:-------:|| +|:----------------------:|:--------------------------------------------------------------------------------------------:|:------------------------------------------------:|:----------:|:-------:|:----------:|:-------:|:-------:|:-----------:|:---------------:|:-------------:|:-------------:|:-------:|:-------:|:-------:|:-------:| | --- | --- | X86 CPU | NVIDIA GPU | X86 CPU | NVIDIA GPU | X86 CPU | Arm CPU | AArch64 CPU | 飞腾D2000 aarch64 | [NVIDIA Jetson](./docs/cn/build_and_install/jetson.md) | [Graphcore IPU](./docs/cn/build_and_install/ipu.md) | [KunlunXin XPU](./docs/cn/build_and_install/kunlunxin.md) |[Huawei Ascend](./docs/en/build_and_install/huawei_ascend.md) | [Serving](./serving) | | Classification | [PaddleClas/ResNet50](./examples/vision/classification/paddleclas) | [✅](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | | Classification | [TorchVison/ResNet](examples/vision/classification/resnet) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ❔ | From d256bd75d1ea81dd99e885fa9136a76104efe751 Mon Sep 17 00:00:00 2001 From: yunyaoXYY Date: Wed, 28 Dec 2022 11:44:47 +0000 Subject: [PATCH 20/30] Add ascend model list --- README_CN.md | 2 +- README_EN.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README_CN.md b/README_CN.md index 0962978bb..a0988f297 100755 --- a/README_CN.md +++ b/README_CN.md @@ -187,7 +187,7 @@ int main(int argc, char* argv[]) { | 任务场景 | 模型 | Linux | Linux | Win | Win | Mac | Mac | Linux | Linux | Linux | Linux | Linux | Linux | Linux | -|:----------------------:|:--------------------------------------------------------------------------------------------:|:------------------------------------------------:|:----------:|:-------:|:----------:|:-------:|:-------:|:-----------:|:---------------:|:-------------:|:-------------:|:-------:|:-------:|:-------:|:-------:| +|:----------------------:|:--------------------------------------------------------------------------------------------:|:------------------------------------------------:|:----------:|:-------:|:----------:|:-------:|:-------:|:-----------:|:---------------:|:-------------:|:-------------:|:-------:|:-------:|:-------:| | --- | --- | X86 CPU | NVIDIA GPU | X86 CPU | NVIDIA GPU | X86 CPU | Arm CPU | AArch64 CPU | 飞腾D2000 aarch64 | [NVIDIA Jetson](./docs/cn/build_and_install/jetson.md) | [Graphcore IPU](./docs/cn/build_and_install/ipu.md) | [KunlunXin XPU](./docs/cn/build_and_install/kunlunxin.md) |[Huawei Ascend](./docs/cn/build_and_install/huawei_ascend.md) | [Serving](./serving) | | Classification | [PaddleClas/ResNet50](./examples/vision/classification/paddleclas) | [✅](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | | Classification | [TorchVison/ResNet](examples/vision/classification/resnet) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ❔ | diff --git a/README_EN.md b/README_EN.md index 355567cb6..8cb40391c 100755 --- a/README_EN.md +++ b/README_EN.md @@ -187,8 +187,8 @@ Notes: ✅: already supported; ❔: to be supported in the future; N/A: Not Ava | Task | Model | Linux | Linux | Win | Win | Mac | Mac | Linux | Linux | Linux | Linux | Linux | Linux |Linux | -|:----------------------:|:--------------------------------------------------------------------------------------------:|:------------------------------------------------:|:----------:|:-------:|:----------:|:-------:|:-------:|:-----------:|:---------------:|:-------------:|:-------------:|:-------:|:-------:|:-------:|:-------:| -| --- | --- | X86 CPU | NVIDIA GPU | X86 CPU | NVIDIA GPU | X86 CPU | Arm CPU | AArch64 CPU | 飞腾D2000 aarch64 | [NVIDIA Jetson](./docs/cn/build_and_install/jetson.md) | [Graphcore IPU](./docs/cn/build_and_install/ipu.md) | [KunlunXin XPU](./docs/cn/build_and_install/kunlunxin.md) |[Huawei Ascend](./docs/en/build_and_install/huawei_ascend.md) | [Serving](./serving) | +|:----------------------:|:--------------------------------------------------------------------------------------------:|:------------------------------------------------:|:----------:|:-------:|:----------:|:-------:|:-------:|:-----------:|:---------------:|:-------------:|:-------------:|:-------:|:-------:|:-------:| +| --- | --- | X86 CPU | NVIDIA GPU | X86 CPU | NVIDIA GPU | X86 CPU | Arm CPU | AArch64 CPU | Phytium D2000CPU | [NVIDIA Jetson](./docs/cn/build_and_install/jetson.md) | [Graphcore IPU](./docs/cn/build_and_install/ipu.md) | [KunlunXin XPU](./docs/cn/build_and_install/kunlunxin.md) |[Huawei Ascend](./docs/en/build_and_install/huawei_ascend.md) | [Serving](./serving) | | Classification | [PaddleClas/ResNet50](./examples/vision/classification/paddleclas) | [✅](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | | Classification | [TorchVison/ResNet](examples/vision/classification/resnet) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ❔ | | Classification | [ltralytics/YOLOv5Cls](examples/vision/classification/yolov5cls) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ |❔ | From afa3b886f364b1e1e9a7823746da6516ded03bbc Mon Sep 17 00:00:00 2001 From: Zheng-Bicheng <58363586+Zheng-Bicheng@users.noreply.github.com> Date: Thu, 2 Feb 2023 15:28:38 +0800 Subject: [PATCH 21/30] [Bug Fix] fixed labels setting of YOLOv5 (#1213) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 修复自己训练的yolov5无法指定label个数的错误 --- docs/cn/faq/rknpu2/build.md | 3 +- .../detection/rkyolo/cpp/CMakeLists.txt | 35 ++----------- .../vision/detection/rkyolo/cpp/README_CN.md | 50 +------------------ .../detection/contrib/rknpu2/postprocessor.cc | 1 - .../detection/contrib/rknpu2/postprocessor.h | 31 ++++++------ 5 files changed, 24 insertions(+), 96 deletions(-) diff --git a/docs/cn/faq/rknpu2/build.md b/docs/cn/faq/rknpu2/build.md index c99bcb17f..7389d2396 100644 --- a/docs/cn/faq/rknpu2/build.md +++ b/docs/cn/faq/rknpu2/build.md @@ -47,8 +47,7 @@ cmake .. -DCMAKE_C_COMPILER=/home/zbc/opt/gcc-linaro-6.3.1-2017.05-x86_64_aarch -DENABLE_ORT_BACKEND=OFF \ -DENABLE_RKNPU2_BACKEND=ON \ -DENABLE_VISION=ON \ - -DRKNN2_TARGET_SOC=RK3588 \ - -DENABLE_FLYCV=ON \ + -DRKNN2_TARGET_SOC=RK356X \ -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.0 make -j8 make install diff --git a/examples/vision/detection/rkyolo/cpp/CMakeLists.txt b/examples/vision/detection/rkyolo/cpp/CMakeLists.txt index 524b94fea..c29281d84 100644 --- a/examples/vision/detection/rkyolo/cpp/CMakeLists.txt +++ b/examples/vision/detection/rkyolo/cpp/CMakeLists.txt @@ -4,34 +4,9 @@ project(rknpu2_test) set(CMAKE_CXX_STANDARD 14) # 指定下载解压后的fastdeploy库路径 -set(FASTDEPLOY_INSTALL_DIR "thirdpartys/fastdeploy-0.0.3") - -include(${FASTDEPLOY_INSTALL_DIR}/FastDeployConfig.cmake) -include_directories(${FastDeploy_INCLUDE_DIRS}) - +option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.") +include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) +# 添加FastDeploy依赖头文件 +include_directories(${FASTDEPLOY_INCS}) add_executable(infer_rkyolo infer_rkyolo.cc) -target_link_libraries(infer_rkyolo ${FastDeploy_LIBS}) - - - -set(CMAKE_INSTALL_PREFIX ${CMAKE_SOURCE_DIR}/build/install) - -install(TARGETS infer_rkyolo DESTINATION ./) - -install(DIRECTORY model DESTINATION ./) -install(DIRECTORY images DESTINATION ./) - -file(GLOB FASTDEPLOY_LIBS ${FASTDEPLOY_INSTALL_DIR}/lib/*) -message("${FASTDEPLOY_LIBS}") -install(PROGRAMS ${FASTDEPLOY_LIBS} DESTINATION lib) - -file(GLOB ONNXRUNTIME_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/onnxruntime/lib/*) -install(PROGRAMS ${ONNXRUNTIME_LIBS} DESTINATION lib) - -install(DIRECTORY ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/opencv/lib DESTINATION ./) - -file(GLOB PADDLETOONNX_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/paddle2onnx/lib/*) -install(PROGRAMS ${PADDLETOONNX_LIBS} DESTINATION lib) - -file(GLOB RKNPU2_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/rknpu2_runtime/${RKNN2_TARGET_SOC}/lib/*) -install(PROGRAMS ${RKNPU2_LIBS} DESTINATION lib) +target_link_libraries(infer_rkyolo ${FASTDEPLOY_LIBS}) diff --git a/examples/vision/detection/rkyolo/cpp/README_CN.md b/examples/vision/detection/rkyolo/cpp/README_CN.md index 924e34984..7d2827173 100644 --- a/examples/vision/detection/rkyolo/cpp/README_CN.md +++ b/examples/vision/detection/rkyolo/cpp/README_CN.md @@ -10,58 +10,12 @@ 以上步骤请参考[RK2代NPU部署库编译](../../../../../docs/cn/build_and_install/rknpu2.md)实现 -## 生成基本目录文件 - -该例程由以下几个部分组成 -```text -. -├── CMakeLists.txt -├── build # 编译文件夹 -├── image # 存放图片的文件夹 -├── infer_rkyolo.cc -├── model # 存放模型文件的文件夹 -└── thirdpartys # 存放sdk的文件夹 -``` - -首先需要先生成目录结构 -```bash -mkdir build -mkdir images -mkdir model -mkdir thirdpartys -``` - -## 编译 - -### 编译并拷贝SDK到thirdpartys文件夹 - -请参考[RK2代NPU部署库编译](../../../../../docs/cn/build_and_install/rknpu2.md)仓库编译SDK,编译完成后,将在build目录下生成 -fastdeploy-0.0.3目录,请移动它至thirdpartys目录下. - -### 拷贝模型文件,以及配置文件至model文件夹 -在Paddle动态图模型 -> Paddle静态图模型 -> ONNX模型的过程中,将生成ONNX文件以及对应的yaml配置文件,请将配置文件存放到model文件夹内。 -转换为RKNN后的模型文件也需要拷贝至model。 - -### 准备测试图片至image文件夹 ```bash wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg -cp 000000014439.jpg ./images -``` - -### 编译example - -```bash cd build -cmake .. +cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x make -j8 -make install -``` - -## 运行例程 - -```bash -cd ./build/install -./infer_picodet model/ images/000000014439.jpg +./infer_rkyolo /path/to/model 000000014439.jpg ``` diff --git a/fastdeploy/vision/detection/contrib/rknpu2/postprocessor.cc b/fastdeploy/vision/detection/contrib/rknpu2/postprocessor.cc index 581cac2e8..7aae742ea 100755 --- a/fastdeploy/vision/detection/contrib/rknpu2/postprocessor.cc +++ b/fastdeploy/vision/detection/contrib/rknpu2/postprocessor.cc @@ -141,7 +141,6 @@ int RKYOLOPostprocessor::ProcessFP16(float* input, int* anchor, int grid_h, } else { limit_score = box_conf_f32 * class_prob_f32; } - // printf("limit score: %f", limit_score); if (limit_score > conf_threshold_) { float box_x, box_y, box_w, box_h; if (anchor_per_branch_ == 1) { diff --git a/fastdeploy/vision/detection/contrib/rknpu2/postprocessor.h b/fastdeploy/vision/detection/contrib/rknpu2/postprocessor.h index b65479415..eea3fe521 100755 --- a/fastdeploy/vision/detection/contrib/rknpu2/postprocessor.h +++ b/fastdeploy/vision/detection/contrib/rknpu2/postprocessor.h @@ -55,26 +55,30 @@ class FASTDEPLOY_DECL RKYOLOPostprocessor { /// Get nms_threshold, default 0.45 float GetNMSThreshold() const { return nms_threshold_; } - // Set height and weight + /// Set height and weight void SetHeightAndWeight(int& height, int& width) { height_ = height; width_ = width; } - // Set pad_hw_values + /// Set pad_hw_values void SetPadHWValues(std::vector> pad_hw_values) { pad_hw_values_ = pad_hw_values; } - // Set scale - void SetScale(std::vector scale) { - scale_ = scale; + /// Set scale + void SetScale(std::vector scale) { scale_ = scale; } + + /// Set Anchor + void SetAnchor(std::vector anchors, int anchor_per_branch) { + anchors_ = anchors; + anchor_per_branch_ = anchor_per_branch; } - // Set Anchor - void SetAnchor(std::vector anchors, int anchor_per_branch) { - anchors_ = anchors; - anchor_per_branch_ = anchor_per_branch; + /// Set the number of class + void SetClassNum(int num) { + obj_class_num_ = num; + prob_box_size_ = obj_class_num_ + 5; } private: @@ -85,12 +89,9 @@ class FASTDEPLOY_DECL RKYOLOPostprocessor { int width_ = 0; int anchor_per_branch_ = 0; - int ProcessFP16(float *input, int *anchor, int grid_h, - int grid_w, int stride, - std::vector &boxes, - std::vector &boxScores, - std::vector &classId, - float threshold); + int ProcessFP16(float* input, int* anchor, int grid_h, int grid_w, int stride, + std::vector& boxes, std::vector& boxScores, + std::vector& classId, float threshold); // Model int QuickSortIndiceInverse(std::vector& input, int left, int right, std::vector& indices); From ba6d75f526652fa3bd73443a0a985bc264760882 Mon Sep 17 00:00:00 2001 From: huangjianhui <852142024@qq.com> Date: Thu, 2 Feb 2023 15:44:52 +0800 Subject: [PATCH 22/30] Delete redundant code (#1222) Update paddle_backend.cc Delete redundant code Co-authored-by: Jason --- fastdeploy/runtime/backends/paddle/paddle_backend.cc | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/fastdeploy/runtime/backends/paddle/paddle_backend.cc b/fastdeploy/runtime/backends/paddle/paddle_backend.cc index 7b96aeff9..90bd27682 100644 --- a/fastdeploy/runtime/backends/paddle/paddle_backend.cc +++ b/fastdeploy/runtime/backends/paddle/paddle_backend.cc @@ -92,23 +92,19 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) { bool PaddleBackend::InitFromPaddle(const std::string& model_buffer, const std::string& params_buffer, const PaddleBackendOption& option) { - // bool PaddleBackend::InitFromPaddle(const std::string& contents) { if (initialized_) { FDERROR << "PaddleBackend is already initlized, cannot initialize again." << std::endl; return false; } - - // The input/output information get from predictor is not right, use - // PaddleReader instead now - std::string contents; - config_.SetModelBuffer(model_buffer.c_str(), model_buffer.size(), params_buffer.c_str(), params_buffer.size()); - contents = model_buffer; config_.EnableMemoryOptim(); BuildOption(option); - auto reader = paddle2onnx::PaddleReader(contents.c_str(), contents.size()); + + // The input/output information get from predictor is not right, use + // PaddleReader instead now + auto reader = paddle2onnx::PaddleReader(model_buffer.c_str(), model_buffer.size()); // If it's a quantized model, and use cpu with mkldnn, automaticaly switch to // int8 mode if (reader.is_quantize_model) { From 50df4b02e9016182a07685a6756f422633dcc95a Mon Sep 17 00:00:00 2001 From: Jason Date: Thu, 2 Feb 2023 16:15:23 +0800 Subject: [PATCH 23/30] Update build.yml --- .github/workflows/build.yml | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 18ee7cce3..141bae96d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -47,3 +47,38 @@ jobs: python setup.py build python setup.py bdist_wheel ls -l + linux-aarch64-py: + runs-on: ubuntu-18.04 + name: Build on ubuntu-18.04 aarch64 + steps: + - uses: actions/checkout@v3 + - uses: uraimo/run-on-arch-action@v2 + name: Run commands + id: runcmd + with: + arch: armv7 + distro: ubuntu18.04 + + - name: Clone + uses: actions/checkout@v1 + + - name: Get CMake + uses: lukka/get-cmake@latest + + - name: Get Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Build FastDeploy + working-directory: ./python + run: | + export ENABLE_ORT_BACKEND=ON + export ENABLE_PADDLE_BACKEND=OFF + export ENABLE_OPENVINO_BACKEND=OFF + export ENABLE_VISION=ON + export ENABLE_TEXT=ON + python -m pip install wheel + python setup.py build + python setup.py bdist_wheel + ls -l From 7efe2dc1f9736b3ed9d9b256dc92a732b2ad87ec Mon Sep 17 00:00:00 2001 From: Jason Date: Thu, 2 Feb 2023 16:15:47 +0800 Subject: [PATCH 24/30] Update build.yml --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 141bae96d..92873dd18 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -56,7 +56,7 @@ jobs: name: Run commands id: runcmd with: - arch: armv7 + arch: aarch64 distro: ubuntu18.04 - name: Clone From 947df2f51207b8eed033dd5a29f31359a4f50563 Mon Sep 17 00:00:00 2001 From: Jason Date: Thu, 2 Feb 2023 16:22:40 +0800 Subject: [PATCH 25/30] Update build.yml --- .github/workflows/build.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 92873dd18..5a83c0702 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -58,6 +58,9 @@ jobs: with: arch: aarch64 distro: ubuntu18.04 + run: | + uname -a + echo ::set-output name=uname::$(uname -a) - name: Clone uses: actions/checkout@v1 From f6b1b6eb4cf8bcca5a9ba5cef62d673299eab8c3 Mon Sep 17 00:00:00 2001 From: leiqing <54695910+leiqing1@users.noreply.github.com> Date: Thu, 2 Feb 2023 16:29:27 +0800 Subject: [PATCH 26/30] Update README_CN.md --- README_CN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README_CN.md b/README_CN.md index 3057359d6..88d0fea59 100755 --- a/README_CN.md +++ b/README_CN.md @@ -289,7 +289,7 @@ int main(int argc, char* argv[]) { | Classification | [PaddleClas/SqueeezeNetV1.1](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | | Classification | [PaddleClas/Inceptionv3](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ✅ | | Classification | [PaddleClas/PP-HGNet](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | -| Detection | [PaddleDetection/PP-YOLOE+](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ✅ | +| Detection | 🔥🔥[PaddleDetection/PP-YOLOE+](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ✅ | | Detection | [🔥PaddleDetection/YOLOv8](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ❔ | | Detection | [🔥ultralytics/YOLOv8](./examples/vision/detection/yolov8) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ |❔ | ❔ | | Detection | [PaddleDetection/PicoDet](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ | From 912ed8a497ec42cc4f4cde6af8285f9404f6650c Mon Sep 17 00:00:00 2001 From: leiqing <54695910+leiqing1@users.noreply.github.com> Date: Thu, 2 Feb 2023 16:29:50 +0800 Subject: [PATCH 27/30] Update README_EN.md --- README_EN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README_EN.md b/README_EN.md index 64b349fbd..8390d948e 100644 --- a/README_EN.md +++ b/README_EN.md @@ -275,7 +275,7 @@ Notes: ✅: already supported; ❔: to be supported in the future; N/A: Not Ava | Classification | [PaddleClas/SqueeezeNetV1.1](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | | Classification | [PaddleClas/Inceptionv3](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ✅ | | Classification | [PaddleClas/PP-HGNet](./examples/vision/classification/paddleclas) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |✅ | ✅ | -| Detection | [PaddleDetection/PP-YOLOE+](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ✅ | +| Detection | 🔥🔥[PaddleDetection/PP-YOLOE+](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ✅ | | Detection | [🔥PaddleDetection/YOLOv8](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ |✅ | ❔ | | Detection | [🔥ultralytics/YOLOv8](./examples/vision/detection/yolov8) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ❔ |❔ | ❔ | | Detection | [PaddleDetection/PicoDet](./examples/vision/detection/paddledetection) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | ❔ | ✅ | From 56e4e56abc3d8a11e3a8e1d8043d88c2d5bd7b83 Mon Sep 17 00:00:00 2001 From: Jason Date: Thu, 2 Feb 2023 16:37:52 +0800 Subject: [PATCH 28/30] Update build.yml --- .github/workflows/build.yml | 58 +------------------------------------ 1 file changed, 1 insertion(+), 57 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5a83c0702..d1569f65a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,25 +1,7 @@ name: Build -on: [push, pull_request] +on: [pull_request] jobs: - macOS-latest-cpp: - runs-on: macOS-latest - - steps: - - name: Clone - uses: actions/checkout@v1 - - - name: Get CMake - uses: lukka/get-cmake@latest - - - name: Build FastDeploy - run: | - mkdir build - cd build - cmake .. -DENABLE_ORT_BACKEND=ON -DENABLE_PADDLE_BACKEND=OFF -DENABLE_OPENVINO_BACKEND=OFF -DCMAKE_INSTALL_PREFIX=${PWD}/compiled_fastdeploy_sdk -DENABLE_VISION=ON -DENABLE_TEXT=ON - make -j12 - make install - ls -l macOS-latest-py: runs-on: macos-latest @@ -47,41 +29,3 @@ jobs: python setup.py build python setup.py bdist_wheel ls -l - linux-aarch64-py: - runs-on: ubuntu-18.04 - name: Build on ubuntu-18.04 aarch64 - steps: - - uses: actions/checkout@v3 - - uses: uraimo/run-on-arch-action@v2 - name: Run commands - id: runcmd - with: - arch: aarch64 - distro: ubuntu18.04 - run: | - uname -a - echo ::set-output name=uname::$(uname -a) - - - name: Clone - uses: actions/checkout@v1 - - - name: Get CMake - uses: lukka/get-cmake@latest - - - name: Get Python - uses: actions/setup-python@v4 - with: - python-version: '3.10' - - - name: Build FastDeploy - working-directory: ./python - run: | - export ENABLE_ORT_BACKEND=ON - export ENABLE_PADDLE_BACKEND=OFF - export ENABLE_OPENVINO_BACKEND=OFF - export ENABLE_VISION=ON - export ENABLE_TEXT=ON - python -m pip install wheel - python setup.py build - python setup.py bdist_wheel - ls -l From 3c5824c862b7dbdd61f93aa7bc295074135e1936 Mon Sep 17 00:00:00 2001 From: Jason Date: Thu, 2 Feb 2023 17:40:03 +0800 Subject: [PATCH 29/30] [Other] Refine Paddle Lite backend (#1202) * Refine Paddle Lite backend * fix error * Fix compile error * Fix build error * modify cpu configure * fix error * Fix comment * Fix error --- .../backends/lite/configure_hardware.cc | 159 ++++++++++++ .../runtime/backends/lite/lite_backend.cc | 233 +++++------------- .../runtime/backends/lite/lite_backend.h | 26 +- fastdeploy/runtime/backends/lite/option.h | 30 +-- fastdeploy/runtime/runtime_option.cc | 8 +- 5 files changed, 258 insertions(+), 198 deletions(-) create mode 100644 fastdeploy/runtime/backends/lite/configure_hardware.cc diff --git a/fastdeploy/runtime/backends/lite/configure_hardware.cc b/fastdeploy/runtime/backends/lite/configure_hardware.cc new file mode 100644 index 000000000..7c7a9993c --- /dev/null +++ b/fastdeploy/runtime/backends/lite/configure_hardware.cc @@ -0,0 +1,159 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/runtime/backends/lite/lite_backend.h" +// https://github.com/PaddlePaddle/Paddle-Lite/issues/8290 +// When compiling the FastDeploy dynamic library, namely, +// WITH_STATIC_LIB=OFF, and depending on the Paddle Lite +// static library, you need to include the fake registration +// codes of Paddle Lite. When you compile the FastDeploy static +// library and depends on the Paddle Lite static library, +// WITH_STATIC_LIB=ON, you do not need to include the fake +// registration codes for Paddle Lite, but wait until you +// use the FastDeploy static library. +#if (defined(WITH_LITE_STATIC) && (!defined(WITH_STATIC_LIB))) +#warning You are compiling the FastDeploy dynamic library with \ +Paddle Lite static lib We will automatically add some registration \ +codes for ops, kernels and passes for Paddle Lite. +#include "paddle_use_kernels.h" // NOLINT +#include "paddle_use_ops.h" // NOLINT +#include "paddle_use_passes.h" // NOLINT +#endif + +#include + +namespace fastdeploy { + +#if defined(__arm__) || defined(__aarch64__) +#define FD_LITE_HOST TARGET(kARM) +#elif defined(__x86_64__) +#define FD_LITE_HOST TARGET(kX86) +#endif + +std::vector GetPlacesForCpu( + const LiteBackendOption& option) { + std::vector valid_places; + valid_places.push_back( + paddle::lite_api::Place{FD_LITE_HOST, PRECISION(kInt8)}); + if (option.enable_fp16) { + paddle::lite_api::MobileConfig check_fp16_config; + if (check_fp16_config.check_fp16_valid()) { + valid_places.push_back( + paddle::lite_api::Place{FD_LITE_HOST, PRECISION(kFP16)}); + } else { + FDWARNING << "Current CPU doesn't support float16 precision, will " + "fallback to float32." + << std::endl; + } + } + valid_places.push_back( + paddle::lite_api::Place{FD_LITE_HOST, PRECISION(kFloat)}); + return valid_places; +} + +void LiteBackend::ConfigureCpu(const LiteBackendOption& option) { + config_.set_valid_places(GetPlacesForCpu(option)); +} + +void LiteBackend::ConfigureKunlunXin(const LiteBackendOption& option) { + std::vector valid_places; + valid_places.push_back( + paddle::lite_api::Place{TARGET(kXPU), PRECISION(kInt8)}); + if (option.enable_fp16) { + valid_places.push_back( + paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFP16)}); + } + valid_places.push_back( + paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFloat)}); + + config_.set_xpu_dev_per_thread(option.device_id); + config_.set_xpu_workspace_l3_size_per_thread( + option.kunlunxin_l3_workspace_size); + config_.set_xpu_l3_cache_method(option.kunlunxin_l3_workspace_size, + option.kunlunxin_locked); + config_.set_xpu_conv_autotune(option.kunlunxin_autotune, + option.kunlunxin_autotune_file); + config_.set_xpu_multi_encoder_method(option.kunlunxin_precision, + option.kunlunxin_adaptive_seqlen); + if (option.kunlunxin_enable_multi_stream) { + config_.enable_xpu_multi_stream(); + } + auto cpu_places = GetPlacesForCpu(option); + valid_places.insert(valid_places.end(), cpu_places.begin(), cpu_places.end()); + config_.set_valid_places(valid_places); +} + +void LiteBackend::ConfigureTimvx(const LiteBackendOption& option) { + config_.set_nnadapter_device_names({"verisilicon_timvx"}); + std::vector valid_places; + valid_places.push_back( + paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kInt8)}); + valid_places.push_back( + paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kFloat)}); + auto cpu_places = GetPlacesForCpu(option); + valid_places.insert(valid_places.end(), cpu_places.begin(), cpu_places.end()); + config_.set_valid_places(valid_places); + ConfigureNNAdapter(option); +} + +void LiteBackend::ConfigureAscend(const LiteBackendOption& option) { + config_.set_nnadapter_device_names({"huawei_ascend_npu"}); + std::vector valid_places; + valid_places.push_back( + paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kInt8)}); + valid_places.push_back( + paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kFloat)}); + auto cpu_places = GetPlacesForCpu(option); + valid_places.insert(valid_places.end(), cpu_places.begin(), cpu_places.end()); + config_.set_valid_places(valid_places); + ConfigureNNAdapter(option); +} + +void LiteBackend::ConfigureNNAdapter(const LiteBackendOption& option) { + if (!option.nnadapter_subgraph_partition_config_path.empty()) { + std::vector nnadapter_subgraph_partition_config_buffer; + if (ReadFile(option.nnadapter_subgraph_partition_config_path, + &nnadapter_subgraph_partition_config_buffer, false)) { + if (!nnadapter_subgraph_partition_config_buffer.empty()) { + std::string nnadapter_subgraph_partition_config_string( + nnadapter_subgraph_partition_config_buffer.data(), + nnadapter_subgraph_partition_config_buffer.size()); + config_.set_nnadapter_subgraph_partition_config_buffer( + nnadapter_subgraph_partition_config_string); + } + } + } + + if (!option.nnadapter_context_properties.empty()) { + config_.set_nnadapter_context_properties( + option.nnadapter_context_properties); + } + + if (!option.nnadapter_model_cache_dir.empty()) { + config_.set_nnadapter_model_cache_dir(option.nnadapter_model_cache_dir); + } + + if (!option.nnadapter_mixed_precision_quantization_config_path.empty()) { + config_.set_nnadapter_mixed_precision_quantization_config_path( + option.nnadapter_mixed_precision_quantization_config_path); + } + + if (!option.nnadapter_subgraph_partition_config_path.empty()) { + config_.set_nnadapter_subgraph_partition_config_path( + option.nnadapter_subgraph_partition_config_path); + } + + config_.set_nnadapter_dynamic_shape_info(option.nnadapter_dynamic_shape_info); +} +} // namespace fastdeploy diff --git a/fastdeploy/runtime/backends/lite/lite_backend.cc b/fastdeploy/runtime/backends/lite/lite_backend.cc index f20c23c57..f9d47a7a5 100644 --- a/fastdeploy/runtime/backends/lite/lite_backend.cc +++ b/fastdeploy/runtime/backends/lite/lite_backend.cc @@ -14,164 +14,41 @@ #include "fastdeploy/runtime/backends/lite/lite_backend.h" // https://github.com/PaddlePaddle/Paddle-Lite/issues/8290 -// When compiling the FastDeploy dynamic library, namely, -// WITH_STATIC_LIB=OFF, and depending on the Paddle Lite +// When compiling the FastDeploy dynamic library, namely, +// WITH_STATIC_LIB=OFF, and depending on the Paddle Lite // static library, you need to include the fake registration -// codes of Paddle Lite. When you compile the FastDeploy static -// library and depends on the Paddle Lite static library, -// WITH_STATIC_LIB=ON, you do not need to include the fake -// registration codes for Paddle Lite, but wait until you +// codes of Paddle Lite. When you compile the FastDeploy static +// library and depends on the Paddle Lite static library, +// WITH_STATIC_LIB=ON, you do not need to include the fake +// registration codes for Paddle Lite, but wait until you // use the FastDeploy static library. #if (defined(WITH_LITE_STATIC) && (!defined(WITH_STATIC_LIB))) #warning You are compiling the FastDeploy dynamic library with \ Paddle Lite static lib We will automatically add some registration \ codes for ops, kernels and passes for Paddle Lite. -#include "paddle_use_ops.h" // NOLINT -#include "paddle_use_kernels.h" // NOLINT -#include "paddle_use_passes.h" // NOLINT +#include "paddle_use_kernels.h" // NOLINT +#include "paddle_use_ops.h" // NOLINT +#include "paddle_use_passes.h" // NOLINT #endif #include namespace fastdeploy { -// Convert data type from paddle lite to fastdeploy -FDDataType LiteDataTypeToFD(const paddle::lite_api::PrecisionType& dtype) { - if (dtype == paddle::lite_api::PrecisionType::kFloat) { - return FDDataType::FP32; - } else if (dtype == paddle::lite_api::PrecisionType::kInt8) { - return FDDataType::INT8; - } else if (dtype == paddle::lite_api::PrecisionType::kInt32) { - return FDDataType::INT32; - } else if (dtype == paddle::lite_api::PrecisionType::kInt64) { - return FDDataType::INT64; - } else if (dtype == paddle::lite_api::PrecisionType::kInt16) { - return FDDataType::INT16; - } else if (dtype == paddle::lite_api::PrecisionType::kUInt8) { - return FDDataType::UINT8; - } else if (dtype == paddle::lite_api::PrecisionType::kFP64) { - return FDDataType::FP64; - } - FDASSERT(false, "Unexpected data type of %d.", dtype); - return FDDataType::FP32; -} - void LiteBackend::BuildOption(const LiteBackendOption& option) { option_ = option; - std::vector valid_places; - if (option_.enable_int8) { - if (option_.enable_kunlunxin) { - valid_places.push_back( - paddle::lite_api::Place{TARGET(kXPU), PRECISION(kInt8)}); - } else { - valid_places.push_back( - paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)}); - } - FDINFO << "Lite::Backend enable_int8 option is ON ! Lite::Backend will " - << "inference with int8 precision!" << std::endl; + + if (option_.device == Device::CPU) { + ConfigureCpu(option_); + } else if (option_.device == Device::TIMVX) { + ConfigureTimvx(option_); + } else if (option_.device == Device::KUNLUNXIN) { + ConfigureKunlunXin(option_); + } else if (option_.device == Device::ASCEND) { + ConfigureAscend(option_); } - if (option_.enable_fp16) { - if (option_.enable_kunlunxin) { - valid_places.push_back( - paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFP16)}); - } else { - paddle::lite_api::MobileConfig check_fp16_config; - // Determine whether the device supports the FP16 - // instruction set (or whether it is an arm device - // of the armv8.2 architecture) - supported_fp16_ = check_fp16_config.check_fp16_valid(); - if (supported_fp16_) { - valid_places.push_back( - paddle::lite_api::Place{TARGET(kARM), PRECISION(kFP16)}); - FDINFO << "The device supports FP16, Lite::Backend will inference with " - "FP16 precision." - << std::endl; - } else { - FDWARNING << "The device doesn't support FP16, will fallback to FP32."; - } - } - } - if (!option_.nnadapter_subgraph_partition_config_path.empty()) { - std::vector nnadapter_subgraph_partition_config_buffer; - if (ReadFile(option_.nnadapter_subgraph_partition_config_path, - &nnadapter_subgraph_partition_config_buffer, false)) { - if (!nnadapter_subgraph_partition_config_buffer.empty()) { - std::string nnadapter_subgraph_partition_config_string( - nnadapter_subgraph_partition_config_buffer.data(), - nnadapter_subgraph_partition_config_buffer.size()); - config_.set_nnadapter_subgraph_partition_config_buffer( - nnadapter_subgraph_partition_config_string); - } - } - } - if (option_.enable_timvx) { - config_.set_nnadapter_device_names({"verisilicon_timvx"}); - valid_places.push_back( - paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kInt8)}); - valid_places.push_back( - paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kFloat)}); - valid_places.push_back( - paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)}); - } - - if (option_.enable_ascend) { - if (option_.nnadapter_device_names.empty()) { - config_.set_nnadapter_device_names({"huawei_ascend_npu"}); - } else { - config_.set_nnadapter_device_names(option_.nnadapter_device_names); - } - - if (!option_.nnadapter_context_properties.empty()) { - config_.set_nnadapter_context_properties( - option_.nnadapter_context_properties); - } - - if (!option_.nnadapter_model_cache_dir.empty()) { - config_.set_nnadapter_model_cache_dir(option_.nnadapter_model_cache_dir); - } - - if (!option_.nnadapter_mixed_precision_quantization_config_path.empty()) { - config_.set_nnadapter_mixed_precision_quantization_config_path( - option_.nnadapter_mixed_precision_quantization_config_path); - } - - if (!option_.nnadapter_subgraph_partition_config_path.empty()) { - config_.set_nnadapter_subgraph_partition_config_path( - option_.nnadapter_subgraph_partition_config_path); - } - - valid_places.push_back( - paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kInt8)}); - valid_places.push_back( - paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kFloat)}); - valid_places.push_back( - paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)}); - } - - if (option_.enable_kunlunxin) { - valid_places.push_back( - paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFloat)}); - valid_places.push_back( - paddle::lite_api::Place{TARGET(kX86), PRECISION(kFloat)}); - config_.set_xpu_dev_per_thread(option_.device_id); - config_.set_xpu_workspace_l3_size_per_thread( - option_.kunlunxin_l3_workspace_size); - config_.set_xpu_l3_cache_method(option_.kunlunxin_l3_workspace_size, - option_.kunlunxin_locked); - config_.set_xpu_conv_autotune(option_.kunlunxin_autotune, - option_.kunlunxin_autotune_file); - config_.set_xpu_multi_encoder_method(option_.kunlunxin_precision, - option_.kunlunxin_adaptive_seqlen); - if (option_.kunlunxin_enable_multi_stream) { - config_.enable_xpu_multi_stream(); - } - } else { - valid_places.push_back( - paddle::lite_api::Place{TARGET(kARM), PRECISION(kFloat)}); - } - config_.set_valid_places(valid_places); - if (option_.threads > 0) { - config_.set_threads(option_.threads); + if (option_.cpu_threads > 0) { + config_.set_threads(option_.cpu_threads); } if (option_.power_mode > 0) { config_.set_power_mode( @@ -179,29 +56,6 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) { } } -bool LiteBackend::ReadFile(const std::string& filename, - std::vector* contents, const bool binary) { - FILE* fp = fopen(filename.c_str(), binary ? "rb" : "r"); - if (!fp) { - FDERROR << "Cannot open file " << filename << "." << std::endl; - return false; - } - fseek(fp, 0, SEEK_END); - size_t size = ftell(fp); - fseek(fp, 0, SEEK_SET); - contents->clear(); - contents->resize(size); - size_t offset = 0; - char* ptr = reinterpret_cast(&(contents->at(0))); - while (offset < size) { - size_t already_read = fread(ptr, 1, size - offset, fp); - offset += already_read; - ptr += already_read; - } - fclose(fp); - return true; -} - bool LiteBackend::InitFromPaddle(const std::string& model_file, const std::string& params_file, const LiteBackendOption& option) { @@ -246,7 +100,7 @@ bool LiteBackend::InitFromPaddle(const std::string& model_file, auto shape = tensor->shape(); info.shape.assign(shape.begin(), shape.end()); info.name = output_names[i]; - if (!option_.enable_kunlunxin) { + if (!option_.device == Device::KUNLUNXIN) { info.dtype = LiteDataTypeToFD(tensor->precision()); } outputs_desc_.emplace_back(info); @@ -337,4 +191,49 @@ bool LiteBackend::Infer(std::vector& inputs, return true; } +bool ReadFile(const std::string& filename, std::vector* contents, + bool binary) { + FILE* fp = fopen(filename.c_str(), binary ? "rb" : "r"); + if (!fp) { + FDERROR << "Cannot open file " << filename << "." << std::endl; + return false; + } + fseek(fp, 0, SEEK_END); + size_t size = ftell(fp); + fseek(fp, 0, SEEK_SET); + contents->clear(); + contents->resize(size); + size_t offset = 0; + char* ptr = reinterpret_cast(&(contents->at(0))); + while (offset < size) { + size_t already_read = fread(ptr, 1, size - offset, fp); + offset += already_read; + ptr += already_read; + } + fclose(fp); + return true; +} + +// Convert data type from paddle lite to fastdeploy +FDDataType LiteDataTypeToFD(const paddle::lite_api::PrecisionType& dtype) { + if (dtype == paddle::lite_api::PrecisionType::kFloat) { + return FDDataType::FP32; + } else if (dtype == paddle::lite_api::PrecisionType::kInt8) { + return FDDataType::INT8; + } else if (dtype == paddle::lite_api::PrecisionType::kInt32) { + return FDDataType::INT32; + } else if (dtype == paddle::lite_api::PrecisionType::kInt64) { + return FDDataType::INT64; + } else if (dtype == paddle::lite_api::PrecisionType::kInt16) { + return FDDataType::INT16; + } else if (dtype == paddle::lite_api::PrecisionType::kUInt8) { + return FDDataType::UINT8; + } else if (dtype == paddle::lite_api::PrecisionType::kFP64) { + return FDDataType::FP64; + } + FDASSERT(false, "Unexpected data type of %s.", + paddle::lite_api::PrecisionToStr(dtype).c_str()); + return FDDataType::FP32; +} + } // namespace fastdeploy diff --git a/fastdeploy/runtime/backends/lite/lite_backend.h b/fastdeploy/runtime/backends/lite/lite_backend.h index 8a7b2651c..bb01551a0 100755 --- a/fastdeploy/runtime/backends/lite/lite_backend.h +++ b/fastdeploy/runtime/backends/lite/lite_backend.h @@ -19,13 +19,12 @@ #include #include -#include "fastdeploy/runtime/backends/backend.h" -#include "fastdeploy/runtime/backends/lite/option.h" #include "paddle_api.h" // NOLINT +#include "fastdeploy/runtime/backends/backend.h" +#include "fastdeploy/runtime/backends/lite/option.h" + namespace fastdeploy { -// Convert data type from paddle lite to fastdeploy -FDDataType LiteDataTypeToFD(const paddle::lite_api::PrecisionType& dtype); class LiteBackend : public BaseBackend { public: @@ -51,15 +50,26 @@ class LiteBackend : public BaseBackend { std::vector GetOutputInfos() override; private: + void ConfigureCpu(const LiteBackendOption& option); + void ConfigureTimvx(const LiteBackendOption& option); + void ConfigureAscend(const LiteBackendOption& option); + void ConfigureKunlunXin(const LiteBackendOption& option); + void ConfigureNNAdapter(const LiteBackendOption& option); + paddle::lite_api::CxxConfig config_; std::shared_ptr predictor_; std::vector inputs_desc_; std::vector outputs_desc_; std::map inputs_order_; LiteBackendOption option_; - bool supported_fp16_ = false; - bool ReadFile(const std::string& filename, - std::vector* contents, - const bool binary = true); }; + +// Convert data type from paddle lite to fastdeploy +FDDataType LiteDataTypeToFD(const paddle::lite_api::PrecisionType& dtype); + +// Helper function to read file +bool ReadFile(const std::string& filename, + std::vector* contents, + bool binary = true); + } // namespace fastdeploy diff --git a/fastdeploy/runtime/backends/lite/option.h b/fastdeploy/runtime/backends/lite/option.h index fc1dfa919..879cb3472 100755 --- a/fastdeploy/runtime/backends/lite/option.h +++ b/fastdeploy/runtime/backends/lite/option.h @@ -17,7 +17,7 @@ #include "fastdeploy/core/fd_type.h" // https://github.com/PaddlePaddle/Paddle-Lite/issues/8290 #if (defined(WITH_LITE_STATIC) && defined(WITH_STATIC_LIB)) -// Whether to output some warning messages when using the +// Whether to output some warning messages when using the // FastDepoy static library, default OFF. These messages // are only reserve for debugging. #if defined(WITH_STATIC_WARNING) @@ -36,7 +36,7 @@ ops, kernels and passes for Paddle Lite. #include #include #include -#include +#include namespace fastdeploy { @@ -51,24 +51,19 @@ enum LitePowerMode { }; struct LiteBackendOption { - // cpu num threads - int threads = 1; - // lite power mode - // 0: LITE_POWER_HIGH - // 1: LITE_POWER_LOW - // 2: LITE_POWER_FULL - // 3: LITE_POWER_NO_BIND - // 4: LITE_POWER_RAND_HIGH - // 5: LITE_POWER_RAND_LOW - int power_mode = 3; - // enable fp16 + /// Paddle Lite power mode for mobile device. + LitePowerMode power_mode = LITE_POWER_NO_BIND; + /// Number of threads while use CPU + int cpu_threads = 1; + /// Enable use half precision bool enable_fp16 = false; - // enable int8 + /// Enable use int8 precision for quantized model bool enable_int8 = false; + + Device device = Device::CPU; + // optimized model dir for CxxConfig std::string optimized_model_dir = ""; - // TODO(qiuyanjun): support more options for lite backend. - // Such as fp16, different device target (kARM/kXPU/kNPU/...) std::string nnadapter_subgraph_partition_config_path = ""; std::string nnadapter_subgraph_partition_config_buffer = ""; std::string nnadapter_context_properties = ""; @@ -77,9 +72,6 @@ struct LiteBackendOption { std::map>> nnadapter_dynamic_shape_info = {{"", {{0}}}}; std::vector nnadapter_device_names = {}; - bool enable_timvx = false; - bool enable_ascend = false; - bool enable_kunlunxin = false; int device_id = 0; int kunlunxin_l3_workspace_size = 0xfffc00; bool kunlunxin_locked = false; diff --git a/fastdeploy/runtime/runtime_option.cc b/fastdeploy/runtime/runtime_option.cc index 0798786b9..7dee5365a 100644 --- a/fastdeploy/runtime/runtime_option.cc +++ b/fastdeploy/runtime/runtime_option.cc @@ -58,7 +58,7 @@ void RuntimeOption::UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name, void RuntimeOption::UseTimVX() { device = Device::TIMVX; - paddle_lite_option.enable_timvx = true; + paddle_lite_option.device = device; } void RuntimeOption::UseKunlunXin(int kunlunxin_id, int l3_workspace_size, @@ -68,7 +68,7 @@ void RuntimeOption::UseKunlunXin(int kunlunxin_id, int l3_workspace_size, bool adaptive_seqlen, bool enable_multi_stream) { device = Device::KUNLUNXIN; - paddle_lite_option.enable_kunlunxin = true; + paddle_lite_option.device = device; paddle_lite_option.device_id = kunlunxin_id; paddle_lite_option.kunlunxin_l3_workspace_size = l3_workspace_size; paddle_lite_option.kunlunxin_locked = locked; @@ -81,7 +81,7 @@ void RuntimeOption::UseKunlunXin(int kunlunxin_id, int l3_workspace_size, void RuntimeOption::UseAscend() { device = Device::ASCEND; - paddle_lite_option.enable_ascend = true; + paddle_lite_option.device = device; } void RuntimeOption::UseSophgo() { @@ -96,7 +96,7 @@ void RuntimeOption::SetExternalStream(void* external_stream) { void RuntimeOption::SetCpuThreadNum(int thread_num) { FDASSERT(thread_num > 0, "The thread_num must be greater than 0."); cpu_thread_num = thread_num; - paddle_lite_option.threads = thread_num; + paddle_lite_option.cpu_threads = thread_num; ort_option.intra_op_num_threads = thread_num; } From ea9a74cee3d8cf2b500b4b423de57c11b67e8371 Mon Sep 17 00:00:00 2001 From: yunyaoXYY Date: Thu, 2 Feb 2023 12:05:07 +0000 Subject: [PATCH 30/30] Fix Paddle Lite Ascend Lib --- cmake/ascend.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/ascend.cmake b/cmake/ascend.cmake index 3e22aa92c..1f4ff80b9 100644 --- a/cmake/ascend.cmake +++ b/cmake/ascend.cmake @@ -26,7 +26,7 @@ if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") else () message(STATUS "Build FastDeploy Ascend Python library on aarch64 platform.") if(NOT PADDLELITE_URL) - set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux_arm64_huawei_ascend_npu_python_0118.tgz") + set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux_arm64_huawei_ascend_npu_python_0202.tgz") endif() endif() endif()