From 8c3ccc2cc20b6caf07b214fd89a78fbbfbf7f6ff Mon Sep 17 00:00:00 2001 From: Zheng-Bicheng <58363586+Zheng-Bicheng@users.noreply.github.com> Date: Mon, 27 Feb 2023 15:01:17 +0800 Subject: [PATCH] [Hackathon 182 Model] Update PPOCRV3 For RKNPU2 (#1403) * update ppocrv3 for rknpu2 * add config * add config * detele unuseful * update useful results * Repair note * Repair note * fixed bugs * update --- FastDeploy.cmake.in | 4 +- cmake/rknpu2.cmake | 11 +- examples/vision/ocr/PP-OCRv3/rknpu2/README.md | 77 ++++++++++ .../ocr/PP-OCRv3/rknpu2/cpp/CMakeLists.txt | 14 ++ .../vision/ocr/PP-OCRv3/rknpu2/cpp/README.md | 55 +++++++ .../ocr/PP-OCRv3/rknpu2/cpp/README_CN.md | 63 ++++++++ .../PP-OCRv3/rknpu2/cpp/infer_static_shape.cc | 126 +++++++++++++++ .../ocr/PP-OCRv3/rknpu2/python/README.md | 49 ++++++ .../ocr/PP-OCRv3/rknpu2/python/README_CN.md | 62 ++++++++ .../rknpu2/python/infer_static_shape.py | 144 ++++++++++++++++++ fastdeploy/vision/ocr/ppocr/classifier.cc | 35 +++-- .../vision/ocr/ppocr/cls_preprocessor.cc | 30 ++-- .../vision/ocr/ppocr/cls_preprocessor.h | 9 ++ fastdeploy/vision/ocr/ppocr/dbdetector.cc | 1 + .../vision/ocr/ppocr/det_preprocessor.cc | 11 +- .../vision/ocr/ppocr/det_preprocessor.h | 30 ++++ .../vision/ocr/ppocr/ocrmodel_pybind.cc | 115 ++++++++------ .../vision/ocr/ppocr/rec_preprocessor.cc | 50 ++++-- .../vision/ocr/ppocr/rec_preprocessor.h | 9 ++ fastdeploy/vision/ocr/ppocr/recognizer.cc | 41 +++-- fastdeploy/vision/visualize/ocr.cc | 12 +- fastdeploy/vision/visualize/visualize.h | 45 +++--- .../fastdeploy/vision/ocr/ppocr/__init__.py | 47 ++++++ tools/rknpu2/config/ppocrv3_cls.yaml | 15 ++ tools/rknpu2/config/ppocrv3_det.yaml | 15 ++ tools/rknpu2/config/ppocrv3_rec.yaml | 15 ++ tools/rknpu2/export.py | 5 +- 27 files changed, 958 insertions(+), 132 deletions(-) create mode 100644 examples/vision/ocr/PP-OCRv3/rknpu2/README.md create mode 100644 examples/vision/ocr/PP-OCRv3/rknpu2/cpp/CMakeLists.txt create mode 100755 examples/vision/ocr/PP-OCRv3/rknpu2/cpp/README.md create mode 100644 examples/vision/ocr/PP-OCRv3/rknpu2/cpp/README_CN.md create mode 100644 examples/vision/ocr/PP-OCRv3/rknpu2/cpp/infer_static_shape.cc create mode 100755 examples/vision/ocr/PP-OCRv3/rknpu2/python/README.md create mode 100644 examples/vision/ocr/PP-OCRv3/rknpu2/python/README_CN.md create mode 100755 examples/vision/ocr/PP-OCRv3/rknpu2/python/infer_static_shape.py mode change 100755 => 100644 fastdeploy/vision/ocr/ppocr/classifier.cc mode change 100755 => 100644 fastdeploy/vision/ocr/ppocr/cls_preprocessor.cc mode change 100755 => 100644 fastdeploy/vision/ocr/ppocr/recognizer.cc create mode 100644 tools/rknpu2/config/ppocrv3_cls.yaml create mode 100644 tools/rknpu2/config/ppocrv3_det.yaml create mode 100644 tools/rknpu2/config/ppocrv3_rec.yaml diff --git a/FastDeploy.cmake.in b/FastDeploy.cmake.in index 5c2c5b733..f5c2dbe83 100644 --- a/FastDeploy.cmake.in +++ b/FastDeploy.cmake.in @@ -131,9 +131,9 @@ endif() if(ENABLE_RKNPU2_BACKEND) if(RKNN2_TARGET_SOC STREQUAL "RK356X") - set(RKNPU2_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/rknpu2_runtime/RK356X/lib/librknn_api.so) + set(RKNPU2_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/rknpu2_runtime/lib/librknnrt.so) elseif (RKNN2_TARGET_SOC STREQUAL "RK3588") - set(RKNPU2_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/rknpu2_runtime/RK3588/lib/librknn_api.so) + set(RKNPU2_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/rknpu2_runtime/lib/librknnrt.so) else () message(FATAL_ERROR "RKNN2_TARGET_SOC is not set, ref value: RK356X or RK3588") endif() diff --git a/cmake/rknpu2.cmake b/cmake/rknpu2.cmake index 7f11b0bfb..3d93a364f 100644 --- a/cmake/rknpu2.cmake +++ b/cmake/rknpu2.cmake @@ -1,7 +1,7 @@ # get RKNPU2_URL set(RKNPU2_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/") -set(RKNPU2_VERSION "1.4.0") -set(RKNPU2_FILE "rknpu2_runtime-linux-x64-${RKNPU2_VERSION}.tgz") +set(RKNPU2_VERSION "1.4.2b0") +set(RKNPU2_FILE "rknpu2_runtime-linux-aarch64-${RKNPU2_VERSION}-${RKNN2_TARGET_SOC}.tgz") set(RKNPU2_URL "${RKNPU2_URL_BASE}${RKNPU2_FILE}") # download_and_decompress @@ -10,11 +10,12 @@ download_and_decompress(${RKNPU2_URL} ${CMAKE_CURRENT_BINARY_DIR}/${RKNPU2_FILE} # set path set(RKNPU_RUNTIME_PATH ${THIRD_PARTY_PATH}/install/rknpu2_runtime) +# include lib if (EXISTS ${RKNPU_RUNTIME_PATH}) - set(RKNN_RT_LIB ${RKNPU_RUNTIME_PATH}/${RKNN2_TARGET_SOC}/lib/librknnrt.so) - include_directories(${RKNPU_RUNTIME_PATH}/${RKNN2_TARGET_SOC}/include) + set(RKNN_RT_LIB ${RKNPU_RUNTIME_PATH}/lib/librknnrt.so) + include_directories(${RKNPU_RUNTIME_PATH}/include) else () - message(FATAL_ERROR "[rknpu2.cmake] download_and_decompress rknpu2_runtime error") + message(FATAL_ERROR "[rknpu2.cmake] RKNPU_RUNTIME_PATH does not exist.") endif () diff --git a/examples/vision/ocr/PP-OCRv3/rknpu2/README.md b/examples/vision/ocr/PP-OCRv3/rknpu2/README.md new file mode 100644 index 000000000..06ba9fc6e --- /dev/null +++ b/examples/vision/ocr/PP-OCRv3/rknpu2/README.md @@ -0,0 +1,77 @@ +# PaddleOCR 模型部署 + +## PaddleOCR为多个模型组合串联任务,包含如下几个模型构成 + +* 文本检测 `DBDetector` +* [可选]方向分类 `Classifer` 用于调整进入文字识别前的图像方向 +* 文字识别 `Recognizer` 用于从图像中识别出文字 + +根据不同场景, FastDeploy汇总提供如下OCR任务部署, 用户需同时下载3个模型与字典文件(或2个,分类器可选), 完成OCR整个预测流程 + +## PP-OCR 中英文系列模型 + +下表中的模型下载链接由PaddleOCR模型库提供, 详见[PP-OCR系列模型列表](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6/doc/doc_ch/models_list.md) + +| OCR版本 | 文本框检测 | 方向分类模型 | 文字识别 | 字典文件 | 说明 | +|:-------------------|:---------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:--------------------------------------------------------| +| ch_PP-OCRv3[推荐] | [ch_PP-OCRv3_det](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar) | [ch_ppocr_mobile_v2.0_cls](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) | [ch_PP-OCRv3_rec](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar) | [ppocr_keys_v1.txt](https://bj.bcebos.com/paddlehub/fastdeploy/ppocr_keys_v1.txt) | OCRv3系列原始超轻量模型,支持中英文、多语种文本检测 | +| en_PP-OCRv3[推荐] | [en_PP-OCRv3_det](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar) | [ch_ppocr_mobile_v2.0_cls](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) | [en_PP-OCRv3_rec](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tar) | [en_dict.txt](https://bj.bcebos.com/paddlehub/fastdeploy/en_dict.txt) | OCRv3系列原始超轻量模型,支持英文与数字识别,除检测模型和识别模型的训练数据与中文模型不同以外,无其他区别 | +| ch_PP-OCRv2 | [ch_PP-OCRv2_det](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar) | [ch_ppocr_mobile_v2.0_cls](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) | [ch_PP-OCRv2_rec](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar) | [ppocr_keys_v1.txt](https://bj.bcebos.com/paddlehub/fastdeploy/ppocr_keys_v1.txt) | OCRv2系列原始超轻量模型,支持中英文、多语种文本检测 | +| ch_PP-OCRv2_mobile | [ch_ppocr_mobile_v2.0_det](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) | [ch_ppocr_mobile_v2.0_cls](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) | [ch_ppocr_mobile_v2.0_rec](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) | [ppocr_keys_v1.txt](https://bj.bcebos.com/paddlehub/fastdeploy/ppocr_keys_v1.txt) | OCRv2系列原始超轻量模型,支持中英文、多语种文本检测,比PPOCRv2更加轻量 | +| ch_PP-OCRv2_server | [ch_ppocr_server_v2.0_det](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) | [ch_ppocr_mobile_v2.0_cls](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) | [ch_ppocr_server_v2.0_rec](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) | [ppocr_keys_v1.txt](https://bj.bcebos.com/paddlehub/fastdeploy/ppocr_keys_v1.txt) | OCRv2服务器系列模型, 支持中英文、多语种文本检测,比超轻量模型更大,但效果更好 | + +## 模型转换 + +在RKNPU2上使用PPOCR时,我们需要把Paddle静态图模型转为RKNN模型。 + +### 静态图模型转RKNN格式模型 + +rknn_toolkit2工具暂不支持直接从Paddle静态图模型直接转换为RKNN模型,因此我们需要先将Paddle静态图模型转为RKNN模型。 + +```bash +# 下载模型和字典文件 +wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar +tar -xvf ch_PP-OCRv3_det_infer.tar + +wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar +tar -xvf ch_ppocr_mobile_v2.0_cls_infer.tar + +wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar +tar -xvf ch_PP-OCRv3_rec_infer.tar + +# 转换模型到ONNX格式的模型 +paddle2onnx --model_dir ch_PP-OCRv3_det_infer \ + --model_filename inference.pdmodel \ + --params_filename inference.pdiparams \ + --save_file ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \ + --enable_dev_version True +paddle2onnx --model_dir ch_ppocr_mobile_v2.0_cls_infer \ + --model_filename inference.pdmodel \ + --params_filename inference.pdiparams \ + --save_file ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \ + --enable_dev_version True +paddle2onnx --model_dir ch_PP-OCRv3_rec_infer \ + --model_filename inference.pdmodel \ + --params_filename inference.pdiparams \ + --save_file ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \ + --enable_dev_version True + +# 固定模型的输入shape +python -m paddle2onnx.optimize --input_model ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \ + --output_model ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \ + --input_shape_dict "{'x':[1,3,960,960]}" +python -m paddle2onnx.optimize --input_model ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \ + --output_model ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \ + --input_shape_dict "{'x':[1,3,48,192]}" +python -m paddle2onnx.optimize --input_model ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \ + --output_model ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \ + --input_shape_dict "{'x':[1,3,48,320]}" + +# 转换ONNX模型到RKNN模型 +python tools/rknpu2/export.py --config_path tools/rknpu2/config/ppocrv3_det.yaml \ + --target_platform rk3588 +python tools/rknpu2/export.py --config_path tools/rknpu2/config/ppocrv3_rec.yaml \ + --target_platform rk3588 +python tools/rknpu2/export.py --config_path tools/rknpu2/config/ppocrv3_cls.yaml \ + --target_platform rk3588 +``` diff --git a/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/CMakeLists.txt b/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/CMakeLists.txt new file mode 100644 index 000000000..9538fea6b --- /dev/null +++ b/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/CMakeLists.txt @@ -0,0 +1,14 @@ +PROJECT(infer_demo C CXX) +CMAKE_MINIMUM_REQUIRED (VERSION 3.10) + +# 指定下载解压后的fastdeploy库路径 +option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.") + +include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) + +# 添加FastDeploy依赖头文件 +include_directories(${FASTDEPLOY_INCS}) + +add_executable(infer_static_shape_demo ${PROJECT_SOURCE_DIR}/infer_static_shape.cc) +# 添加FastDeploy库依赖 +target_link_libraries(infer_static_shape_demo ${FASTDEPLOY_LIBS}) diff --git a/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/README.md b/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/README.md new file mode 100755 index 000000000..af5be5360 --- /dev/null +++ b/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/README.md @@ -0,0 +1,55 @@ +English | [简体中文](README_CN.md) +# PPOCRv3 C++ Deployment Example + +This directory provides examples that `infer.cc` fast finishes the deployment of PPOCRv3 on CPU/GPU and GPU accelerated by TensorRT. + +Two steps before deployment + +- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md) +- 2. Download the precompiled deployment library and samples code according to your development environment. Refer to [FastDeploy Precompiled Library](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md) + +Taking the CPU inference on Linux as an example, the compilation test can be completed by executing the following command in this directory. FastDeploy version 0.7.0 or above (x.x.x>=0.7.0) is required to support this model. + +``` +mkdir build +cd build +# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above +cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x +make -j + + +# Download model, image, and dictionary files +wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg + +wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt + +# CPU推理 +./infer_static_shape_demo ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \ + ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \ + ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \ + ./ppocr_keys_v1.txt \ + ./12.jpg \ + 0 +# RKNPU推理 +./infer_static_shape_demo ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer_rk3588_unquantized.rknn \ + ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v20_cls_infer_rk3588_unquantized.rknn \ + ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer_rk3588_unquantized.rknn \ + ./ppocr_keys_v1.txt \ + ./12.jpg \ + 1 +``` + +The above command works for Linux or MacOS. For SDK in Windows, refer to: +- [How to use FastDeploy C++ SDK in Windows](../../../../../docs/cn/faq/use_sdk_on_windows.md) + +The visualized result after running is as follows + + + +## Other Documents + +- [C++ API Reference](https://baidu-paddle.github.io/fastdeploy-api/cpp/html/) +- [PPOCR Model Description](../../) +- [PPOCRv3 Python Deployment](../python) +- [Model Prediction Results](../../../../../../docs/en/faq/how_to_change_backend.md) +- [How to switch the model inference backend engine](../../../../../../docs/en/faq/how_to_change_backend.md) diff --git a/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/README_CN.md b/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/README_CN.md new file mode 100644 index 000000000..82860ddc5 --- /dev/null +++ b/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/README_CN.md @@ -0,0 +1,63 @@ +[English](README_CN.md) | 简体中文 +# PPOCRv3 C++部署示例 + +本目录下提供`infer.cc`快速完成PPOCRv3在CPU/GPU,以及GPU上通过TensorRT加速部署的示例。 + +在部署前,需确认你已经成功完成以下两个操作: + +* [正确编译FastDeploy SDK](../../../../../../docs/cn/faq/rknpu2/build.md). +* [成功转换模型](../README.md). + +在本目录执行如下命令即可完成编译测试,支持此模型需保证FastDeploy版本1.0.3以上(x.x.x>1.0.3), RKNN版本在1.4.1b22以上。 + +``` +mkdir build +cd build +cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x +make -j + +# 下载图片和字典文件 +wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg +wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt + + +# 拷贝RKNN模型到build目录 + +# CPU推理 +./infer_static_shape_demo ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \ + ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \ + ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \ + ./ppocr_keys_v1.txt \ + ./12.jpg \ + 0 +# RKNPU推理 +./infer_static_shape_demo ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer_rk3588_unquantized.rknn \ + ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v20_cls_infer_rk3588_unquantized.rknn \ + ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer_rk3588_unquantized.rknn \ + ./ppocr_keys_v1.txt \ + ./12.jpg \ + 1 +``` + +运行完成可视化结果如下图所示: + + + +结果输出如下: + +```text +det boxes: [[276,174],[285,173],[285,178],[276,179]]rec text: rec score:0.000000 cls label: 1 cls score: 0.766602 +det boxes: [[43,408],[483,390],[483,431],[44,449]]rec text: 上海斯格威铂尔曼大酒店 rec score:0.888450 cls label: 0 cls score: 1.000000 +det boxes: [[186,456],[399,448],[399,480],[186,488]]rec text: 打浦路15号 rec score:0.988769 cls label: 0 cls score: 1.000000 +det boxes: [[18,501],[513,485],[514,537],[18,554]]rec text: 绿洲仕格维花园公寓 rec score:0.992730 cls label: 0 cls score: 1.000000 +det boxes: [[78,553],[404,541],[404,573],[78,585]]rec text: 打浦路252935号 rec score:0.983545 cls label: 0 cls score: 1.000000 +Visualized result saved in ./vis_result.jpg +``` + + +## 其它文档 + +- [C++ API查阅](https://baidu-paddle.github.io/fastdeploy-api/cpp/html/) +- [PPOCR 系列模型介绍](../../../README_CN.md) +- [PPOCRv3 Python部署](../python) +- [模型预测结果说明](../../../../../../docs/cn/faq/how_to_change_backend.md) diff --git a/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/infer_static_shape.cc b/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/infer_static_shape.cc new file mode 100644 index 000000000..7add35688 --- /dev/null +++ b/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/infer_static_shape.cc @@ -0,0 +1,126 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision.h" + +void InitAndInfer(const std::string &det_model_file, + const std::string &cls_model_file, + const std::string &rec_model_file, + const std::string &rec_label_file, + const std::string &image_file, + const fastdeploy::RuntimeOption &option, + const fastdeploy::ModelFormat &format) { + auto det_params_file = ""; + auto cls_params_file = ""; + auto rec_params_file = ""; + + auto det_option = option; + auto cls_option = option; + auto rec_option = option; + + if (format == fastdeploy::ONNX) { + std::cout << "ONNX Model" << std::endl; + } + + auto det_model = fastdeploy::vision::ocr::DBDetector( + det_model_file, det_params_file, det_option, format); + auto cls_model = fastdeploy::vision::ocr::Classifier( + cls_model_file, cls_params_file, cls_option, format); + auto rec_model = fastdeploy::vision::ocr::Recognizer( + rec_model_file, rec_params_file, rec_label_file, rec_option, format); + + if (format == fastdeploy::RKNN) { + cls_model.GetPreprocessor().DisableNormalize(); + cls_model.GetPreprocessor().DisablePermute(); + + det_model.GetPreprocessor().DisableNormalize(); + det_model.GetPreprocessor().DisablePermute(); + + rec_model.GetPreprocessor().DisableNormalize(); + rec_model.GetPreprocessor().DisablePermute(); + } + det_model.GetPreprocessor().SetStaticShapeInfer(true); + rec_model.GetPreprocessor().SetStaticShapeInfer(true); + + assert(det_model.Initialized()); + assert(cls_model.Initialized()); + assert(rec_model.Initialized()); + + // The classification model is optional, so the PP-OCR can also be connected + // in series as follows auto ppocr_v3 = + // fastdeploy::pipeline::PPOCRv3(&det_model, &rec_model); + auto ppocr_v3 = + fastdeploy::pipeline::PPOCRv3(&det_model, &cls_model, &rec_model); + + // When users enable static shape infer for rec model, the batch size of cls + // and rec model must to be set to 1. + ppocr_v3.SetClsBatchSize(1); + ppocr_v3.SetRecBatchSize(1); + + if (!ppocr_v3.Initialized()) { + std::cerr << "Failed to initialize PP-OCR." << std::endl; + return; + } + + auto im = cv::imread(image_file); + + fastdeploy::vision::OCRResult result; + if (!ppocr_v3.Predict(im, &result)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + + std::cout << result.Str() << std::endl; + + auto vis_im = fastdeploy::vision::VisOcr(im, result); + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; +} + +int main(int argc, char *argv[]) { + if (argc < 7) { + std::cout << "Usage: infer_demo path/to/det_model path/to/cls_model " + "path/to/rec_model path/to/rec_label_file path/to/image " + "run_option, " + "e.g ./infer_demo ./ch_PP-OCRv3_det_infer " + "./ch_ppocr_mobile_v2.0_cls_infer ./ch_PP-OCRv3_rec_infer " + "./ppocr_keys_v1.txt ./12.jpg 0" + << std::endl; + std::cout << "The data type of run_option is int, 0: run with cpu; 1: run " + "with ascend." + << std::endl; + return -1; + } + + fastdeploy::RuntimeOption option; + fastdeploy::ModelFormat format; + int flag = std::atoi(argv[6]); + + if (flag == 0) { + option.UseCpu(); + format = fastdeploy::ONNX; + } else if (flag == 1) { + option.UseRKNPU2(); + format = fastdeploy::RKNN; + } + + std::string det_model_dir = argv[1]; + std::string cls_model_dir = argv[2]; + std::string rec_model_dir = argv[3]; + std::string rec_label_file = argv[4]; + std::string test_image = argv[5]; + InitAndInfer(det_model_dir, cls_model_dir, rec_model_dir, rec_label_file, + test_image, option, format); + return 0; +} diff --git a/examples/vision/ocr/PP-OCRv3/rknpu2/python/README.md b/examples/vision/ocr/PP-OCRv3/rknpu2/python/README.md new file mode 100755 index 000000000..d281daf83 --- /dev/null +++ b/examples/vision/ocr/PP-OCRv3/rknpu2/python/README.md @@ -0,0 +1,49 @@ +English | [简体中文](README_CN.md) +# PPOCRv3 Python Deployment Example + +Two steps before deployment + +- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md) +- 2. Install FastDeploy Python whl package. Refer to [FastDeploy Python Installation](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md) + +This directory provides examples that `infer.py` fast finishes the deployment of PPOCRv3 on CPU/GPU and GPU accelerated by TensorRT. The script is as follows + +``` +wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg +wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt + +# Download the example code for deployment +git clone https://github.com/PaddlePaddle/FastDeploy.git +cd examples/vision/ocr/PP-OCRv3/python/ + +python3 infer_static_shape.py \ + --det_model ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \ + --cls_model ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \ + --rec_model ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \ + --rec_label_file ./ppocr_keys_v1.txt \ + --image 12.jpg \ + --device cpu + +# NPU推理 +python3 infer_static_shape.py \ + --det_model ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer_rk3588_unquantized.rknn \ + --cls_model ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v20_cls_infer_rk3588_unquantized.rknn \ + --rec_model ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer_rk3588_unquantized.rknn \ + --rec_label_file ppocr_keys_v1.txt \ + --image 12.jpg \ + --device npu +``` + +The visualized result after running is as follows + + + + + +## Other Documents + +- [Python API reference](https://baidu-paddle.github.io/fastdeploy-api/python/html/) +- [PPOCR Model Description](../../) +- [PPOCRv3 C++ Deployment](../cpp) +- [Model Prediction Results](../../../../../../docs/api/vision_results/) +- [How to switch the model inference backend engine](../../../../../../docs/en/faq/how_to_change_backend.md) diff --git a/examples/vision/ocr/PP-OCRv3/rknpu2/python/README_CN.md b/examples/vision/ocr/PP-OCRv3/rknpu2/python/README_CN.md new file mode 100644 index 000000000..663a6b62d --- /dev/null +++ b/examples/vision/ocr/PP-OCRv3/rknpu2/python/README_CN.md @@ -0,0 +1,62 @@ +[English](README.md) | 简体中文 +# PPOCRv3 Python部署示例 + +在部署前,需确认以下两个步骤 + +- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md) +- 2. FastDeploy Python whl包安装,参考[FastDeploy Python安装](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md) + +本目录下提供`infer.py`快速完成PPOCRv3在CPU/GPU,以及GPU上通过TensorRT加速部署的示例。执行如下脚本即可完成 + +``` + +# 下载模型,图片和字典文件 +wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar +tar xvf ch_PP-OCRv3_det_infer.tar + +wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar +tar -xvf ch_ppocr_mobile_v2.0_cls_infer.tar + +wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar +tar xvf ch_PP-OCRv3_rec_infer.tar + +wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg + +wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt + +#下载部署示例代码 +git clone https://github.com/PaddlePaddle/FastDeploy.git +cd examples/vision/ocr/PP-OCRv3/python/ + +# CPU推理 +python3 infer_static_shape.py \ + --det_model ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \ + --cls_model ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \ + --rec_model ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \ + --rec_label_file ./ppocr_keys_v1.txt \ + --image 12.jpg \ + --device cpu + +# NPU推理 +python3 infer_static_shape.py \ + --det_model ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer_rk3588_unquantized.rknn \ + --cls_model ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v20_cls_infer_rk3588_unquantized.rknn \ + --rec_model ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer_rk3588_unquantized.rknn \ + --rec_label_file ppocr_keys_v1.txt \ + --image 12.jpg \ + --device npu +``` + +运行完成可视化结果如下图所示 + + + + + +## 其它文档 + +- [Python API文档查阅](https://baidu-paddle.github.io/fastdeploy-api/python/html/) +- [PPOCR 系列模型介绍](../../) +- [PPOCRv3 C++部署](../cpp) +- [模型预测结果说明](../../../../../../docs/api/vision_results/) +- [如何切换模型推理后端引擎](../../../../../../docs/cn/faq/how_to_change_backend.md) diff --git a/examples/vision/ocr/PP-OCRv3/rknpu2/python/infer_static_shape.py b/examples/vision/ocr/PP-OCRv3/rknpu2/python/infer_static_shape.py new file mode 100755 index 000000000..7aa138217 --- /dev/null +++ b/examples/vision/ocr/PP-OCRv3/rknpu2/python/infer_static_shape.py @@ -0,0 +1,144 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import fastdeploy as fd +import cv2 +import os + + +def parse_arguments(): + import argparse + import ast + parser = argparse.ArgumentParser() + parser.add_argument( + "--det_model", required=True, help="Path of Detection model of PPOCR.") + parser.add_argument( + "--cls_model", + required=True, + help="Path of Classification model of PPOCR.") + parser.add_argument( + "--rec_model", + required=True, + help="Path of Recognization model of PPOCR.") + parser.add_argument( + "--rec_label_file", + required=True, + help="Path of Recognization model of PPOCR.") + parser.add_argument( + "--image", type=str, required=True, help="Path of test image file.") + parser.add_argument( + "--device", + type=str, + default='cpu', + help="Type of inference device, support 'cpu', 'kunlunxin' or 'gpu'.") + parser.add_argument( + "--cpu_thread_num", + type=int, + default=9, + help="Number of threads while inference on CPU.") + return parser.parse_args() + + +def build_option(args): + + det_option = fd.RuntimeOption() + cls_option = fd.RuntimeOption() + rec_option = fd.RuntimeOption() + if args.device == "npu": + det_option.use_rknpu2() + cls_option.use_rknpu2() + rec_option.use_rknpu2() + + return det_option, cls_option, rec_option + + +def build_format(args): + det_format = fd.ModelFormat.ONNX + cls_format = fd.ModelFormat.ONNX + rec_format = fd.ModelFormat.ONNX + if args.device == "npu": + det_format = fd.ModelFormat.RKNN + cls_format = fd.ModelFormat.RKNN + rec_format = fd.ModelFormat.RKNN + + return det_format, cls_format, rec_format + + +args = parse_arguments() + +# Detection模型, 检测文字框 +det_model_file = args.det_model +det_params_file = "" +# Classification模型,方向分类,可选 +cls_model_file = args.cls_model +cls_params_file = "" +# Recognition模型,文字识别模型 +rec_model_file = args.rec_model +rec_params_file = "" +rec_label_file = args.rec_label_file + +det_option, cls_option, rec_option = build_option(args) +det_format, cls_format, rec_format = build_format(args) + +det_model = fd.vision.ocr.DBDetector( + det_model_file, + det_params_file, + runtime_option=det_option, + model_format=det_format) + +cls_model = fd.vision.ocr.Classifier( + cls_model_file, + cls_params_file, + runtime_option=cls_option, + model_format=cls_format) + +rec_model = fd.vision.ocr.Recognizer( + rec_model_file, + rec_params_file, + rec_label_file, + runtime_option=rec_option, + model_format=rec_format) + +# Det,Rec模型启用静态shape推理 +det_model.preprocessor.static_shape_infer = True +rec_model.preprocessor.static_shape_infer = True + +if args.device == "npu": + det_model.preprocessor.disable_normalize() + det_model.preprocessor.disable_permute() + cls_model.preprocessor.disable_normalize() + cls_model.preprocessor.disable_permute() + rec_model.preprocessor.disable_normalize() + rec_model.preprocessor.disable_permute() + +# 创建PP-OCR,串联3个模型,其中cls_model可选,如无需求,可设置为None +ppocr_v3 = fd.vision.ocr.PPOCRv3( + det_model=det_model, cls_model=cls_model, rec_model=rec_model) + +# Cls模型和Rec模型的batch size 必须设置为1, 开启静态shape推理 +ppocr_v3.cls_batch_size = 1 +ppocr_v3.rec_batch_size = 1 + +# 预测图片准备 +im = cv2.imread(args.image) + +#预测并打印结果 +result = ppocr_v3.predict(im) + +print(result) + +# 可视化结果 +vis_im = fd.vision.vis_ppocr(im, result) +cv2.imwrite("visualized_result.jpg", vis_im) +print("Visualized result save in ./visualized_result.jpg") diff --git a/fastdeploy/vision/ocr/ppocr/classifier.cc b/fastdeploy/vision/ocr/ppocr/classifier.cc old mode 100755 new mode 100644 index 55f355db2..b7dcc502b --- a/fastdeploy/vision/ocr/ppocr/classifier.cc +++ b/fastdeploy/vision/ocr/ppocr/classifier.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "fastdeploy/vision/ocr/ppocr/classifier.h" + #include "fastdeploy/utils/perf.h" #include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h" @@ -26,15 +27,16 @@ Classifier::Classifier(const std::string& model_file, const RuntimeOption& custom_option, const ModelFormat& model_format) { if (model_format == ModelFormat::ONNX) { - valid_cpu_backends = {Backend::ORT, - Backend::OPENVINO}; - valid_gpu_backends = {Backend::ORT, Backend::TRT}; + valid_cpu_backends = {Backend::ORT, Backend::OPENVINO}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; } else { - valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO, Backend::LITE}; + valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO, + Backend::LITE}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; valid_kunlunxin_backends = {Backend::LITE}; valid_ascend_backends = {Backend::LITE}; valid_sophgonpu_backends = {Backend::SOPHGOTPU}; + valid_rknpu_backends = {Backend::RKNPU2}; } runtime_option = custom_option; runtime_option.model_format = model_format; @@ -54,16 +56,18 @@ bool Classifier::Initialize() { } std::unique_ptr Classifier::Clone() const { - std::unique_ptr clone_model = utils::make_unique(Classifier(*this)); + std::unique_ptr clone_model = + utils::make_unique(Classifier(*this)); clone_model->SetRuntime(clone_model->CloneRuntime()); return clone_model; } -bool Classifier::Predict(const cv::Mat& img, int32_t* cls_label, float* cls_score) { +bool Classifier::Predict(const cv::Mat& img, int32_t* cls_label, + float* cls_score) { std::vector cls_labels(1); std::vector cls_scores(1); bool success = BatchPredict({img}, &cls_labels, &cls_scores); - if(!success){ + if (!success) { return success; } *cls_label = cls_labels[0]; @@ -72,16 +76,19 @@ bool Classifier::Predict(const cv::Mat& img, int32_t* cls_label, float* cls_scor } bool Classifier::BatchPredict(const std::vector& images, - std::vector* cls_labels, std::vector* cls_scores) { + std::vector* cls_labels, + std::vector* cls_scores) { return BatchPredict(images, cls_labels, cls_scores, 0, images.size()); } bool Classifier::BatchPredict(const std::vector& images, - std::vector* cls_labels, std::vector* cls_scores, + std::vector* cls_labels, + std::vector* cls_scores, size_t start_index, size_t end_index) { size_t total_size = images.size(); std::vector fd_images = WrapMat(images); - if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, start_index, end_index)) { + if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, start_index, + end_index)) { FDERROR << "Failed to preprocess the input image." << std::endl; return false; } @@ -91,13 +98,15 @@ bool Classifier::BatchPredict(const std::vector& images, return false; } - if (!postprocessor_.Run(reused_output_tensors_, cls_labels, cls_scores, start_index, total_size)) { - FDERROR << "Failed to postprocess the inference cls_results by runtime." << std::endl; + if (!postprocessor_.Run(reused_output_tensors_, cls_labels, cls_scores, + start_index, total_size)) { + FDERROR << "Failed to postprocess the inference cls_results by runtime." + << std::endl; return false; } return true; } -} // namesapce ocr +} // namespace ocr } // namespace vision } // namespace fastdeploy diff --git a/fastdeploy/vision/ocr/ppocr/cls_preprocessor.cc b/fastdeploy/vision/ocr/ppocr/cls_preprocessor.cc old mode 100755 new mode 100644 index dcd76c168..35f98acc9 --- a/fastdeploy/vision/ocr/ppocr/cls_preprocessor.cc +++ b/fastdeploy/vision/ocr/ppocr/cls_preprocessor.cc @@ -13,9 +13,10 @@ // limitations under the License. #include "fastdeploy/vision/ocr/ppocr/cls_preprocessor.h" + +#include "fastdeploy/function/concat.h" #include "fastdeploy/utils/perf.h" #include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h" -#include "fastdeploy/function/concat.h" namespace fastdeploy { namespace vision { @@ -38,34 +39,43 @@ void OcrClassifierResizeImage(FDMat* mat, Resize::Run(mat, resize_w, img_h); } -bool ClassifierPreprocessor::Run(std::vector* images, std::vector* outputs) { +bool ClassifierPreprocessor::Run(std::vector* images, + std::vector* outputs) { return Run(images, outputs, 0, images->size()); } -bool ClassifierPreprocessor::Run(std::vector* images, std::vector* outputs, +bool ClassifierPreprocessor::Run(std::vector* images, + std::vector* outputs, size_t start_index, size_t end_index) { - - if (images->size() == 0 || start_index <0 || end_index <= start_index || end_index > images->size()) { - FDERROR << "images->size() or index error. Correct is: 0 <= start_index < end_index <= images->size()" << std::endl; + if (images->size() == 0 || start_index < 0 || end_index <= start_index || + end_index > images->size()) { + FDERROR << "images->size() or index error. Correct is: 0 <= start_index < " + "end_index <= images->size()" + << std::endl; return false; } for (size_t i = start_index; i < end_index; ++i) { FDMat* mat = &(images->at(i)); OcrClassifierResizeImage(mat, cls_image_shape_); - Normalize::Run(mat, mean_, scale_, is_scale_); + if (!disable_normalize_) { + Normalize::Run(mat, mean_, scale_, is_scale_); + } std::vector value = {0, 0, 0}; if (mat->Width() < cls_image_shape_[2]) { Pad::Run(mat, 0, 0, 0, cls_image_shape_[2] - mat->Width(), value); } - HWC2CHW::Run(mat); - Cast::Run(mat, "float"); + + if (!disable_permute_) { + HWC2CHW::Run(mat); + Cast::Run(mat, "float"); + } } // Only have 1 output Tensor. outputs->resize(1); // Concat all the preprocessed data to a batch tensor size_t tensor_size = end_index - start_index; - std::vector tensors(tensor_size); + std::vector tensors(tensor_size); for (size_t i = 0; i < tensor_size; ++i) { (*images)[i + start_index].ShareWithTensor(&(tensors[i])); tensors[i].ExpandDim(0); diff --git a/fastdeploy/vision/ocr/ppocr/cls_preprocessor.h b/fastdeploy/vision/ocr/ppocr/cls_preprocessor.h index 52b2bb737..921f3f826 100644 --- a/fastdeploy/vision/ocr/ppocr/cls_preprocessor.h +++ b/fastdeploy/vision/ocr/ppocr/cls_preprocessor.h @@ -56,7 +56,16 @@ class FASTDEPLOY_DECL ClassifierPreprocessor { /// Get cls_image_shape for the classification preprocess std::vector GetClsImageShape() const { return cls_image_shape_; } + /// This function will disable normalize in preprocessing step. + void DisableNormalize() { disable_permute_ = true; } + /// This function will disable hwc2chw in preprocessing step. + void DisablePermute() { disable_normalize_ = true; } + private: + // for recording the switch of hwc2chw + bool disable_permute_ = false; + // for recording the switch of normalize + bool disable_normalize_ = false; std::vector mean_ = {0.5f, 0.5f, 0.5f}; std::vector scale_ = {0.5f, 0.5f, 0.5f}; bool is_scale_ = true; diff --git a/fastdeploy/vision/ocr/ppocr/dbdetector.cc b/fastdeploy/vision/ocr/ppocr/dbdetector.cc index 7dd0ac84a..914b952f2 100644 --- a/fastdeploy/vision/ocr/ppocr/dbdetector.cc +++ b/fastdeploy/vision/ocr/ppocr/dbdetector.cc @@ -36,6 +36,7 @@ DBDetector::DBDetector(const std::string& model_file, valid_kunlunxin_backends = {Backend::LITE}; valid_ascend_backends = {Backend::LITE}; valid_sophgonpu_backends = {Backend::SOPHGOTPU}; + valid_rknpu_backends = {Backend::RKNPU2}; } runtime_option = custom_option; diff --git a/fastdeploy/vision/ocr/ppocr/det_preprocessor.cc b/fastdeploy/vision/ocr/ppocr/det_preprocessor.cc index 69687d5cd..06f47b6ef 100644 --- a/fastdeploy/vision/ocr/ppocr/det_preprocessor.cc +++ b/fastdeploy/vision/ocr/ppocr/det_preprocessor.cc @@ -20,9 +20,13 @@ namespace fastdeploy { namespace vision { namespace ocr { -std::array OcrDetectorGetInfo(FDMat* img, int max_size_len) { +std::array DBDetectorPreprocessor::OcrDetectorGetInfo( + FDMat* img, int max_size_len) { int w = img->Width(); int h = img->Height(); + if (static_shape_infer_) { + return {w, h, det_image_shape_[2], det_image_shape_[1]}; + } float ratio = 1.f; int max_wh = w >= h ? w : h; @@ -86,7 +90,10 @@ bool DBDetectorPreprocessor::Apply(FDMatBatch* image_batch, ResizeImage(mat, batch_det_img_info_[i][2], batch_det_img_info_[i][3], max_resize_w, max_resize_h); } - (*normalize_permute_op_)(image_batch); + + if (!disable_normalize_ && !disable_permute_) { + (*normalize_permute_op_)(image_batch); + } outputs->resize(1); FDTensor* tensor = image_batch->Tensor(); diff --git a/fastdeploy/vision/ocr/ppocr/det_preprocessor.h b/fastdeploy/vision/ocr/ppocr/det_preprocessor.h index fd7b77de1..32ef80011 100644 --- a/fastdeploy/vision/ocr/ppocr/det_preprocessor.h +++ b/fastdeploy/vision/ocr/ppocr/det_preprocessor.h @@ -59,14 +59,44 @@ class FASTDEPLOY_DECL DBDetectorPreprocessor : public ProcessorManager { return &batch_det_img_info_; } + /// This function will disable normalize in preprocessing step. + void DisableNormalize() { disable_permute_ = true; } + /// This function will disable hwc2chw in preprocessing step. + void DisablePermute() { disable_normalize_ = true; } + + /// Set det_image_shape for the detection preprocess. + /// This api is usually used when you retrain the model. + /// Generally, you do not need to use it. + void SetDetImageShape(const std::vector& det_image_shape) { + det_image_shape_ = det_image_shape; + } + /// Get cls_image_shape for the classification preprocess + std::vector GetDetImageShape() const { return det_image_shape_; } + + /// Set static_shape_infer is true or not. When deploy PP-OCR + /// on hardware which can not support dynamic input shape very well, + /// like Huawei Ascned, static_shape_infer needs to to be true. + void SetStaticShapeInfer(bool static_shape_infer) { + static_shape_infer_ = static_shape_infer; + } + /// Get static_shape_infer of the recognition preprocess + bool GetStaticShapeInfer() const { return static_shape_infer_; } + private: bool ResizeImage(FDMat* img, int resize_w, int resize_h, int max_resize_w, int max_resize_h); + // for recording the switch of hwc2chw + bool disable_permute_ = false; + // for recording the switch of normalize + bool disable_normalize_ = false; int max_side_len_ = 960; std::vector> batch_det_img_info_; std::shared_ptr resize_op_; std::shared_ptr pad_op_; std::shared_ptr normalize_permute_op_; + std::vector det_image_shape_ = {3, 960, 960}; + bool static_shape_infer_ = false; + std::array OcrDetectorGetInfo(FDMat* img, int max_size_len); }; } // namespace ocr diff --git a/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc b/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc index aa77542af..a1ebd09c6 100644 --- a/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc +++ b/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc @@ -26,6 +26,9 @@ void BindPPOCRModel(pybind11::module& m) { pybind11::class_( m, "DBDetectorPreprocessor") .def(pybind11::init<>()) + .def_property("static_shape_infer", + &vision::ocr::DBDetectorPreprocessor::GetStaticShapeInfer, + &vision::ocr::DBDetectorPreprocessor::SetStaticShapeInfer) .def_property("max_side_len", &vision::ocr::DBDetectorPreprocessor::GetMaxSideLen, &vision::ocr::DBDetectorPreprocessor::SetMaxSideLen) @@ -33,19 +36,27 @@ void BindPPOCRModel(pybind11::module& m) { [](vision::ocr::DBDetectorPreprocessor& self, const std::vector& mean, const std::vector& std, bool is_scale) { self.SetNormalize(mean, std, is_scale); }) - .def("run", [](vision::ocr::DBDetectorPreprocessor& self, - std::vector& im_list) { - std::vector images; - for (size_t i = 0; i < im_list.size(); ++i) { - images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); - } - std::vector outputs; - self.Run(&images, &outputs); - auto batch_det_img_info = self.GetBatchImgInfo(); - for (size_t i = 0; i < outputs.size(); ++i) { - outputs[i].StopSharing(); - } - return std::make_pair(outputs, *batch_det_img_info); + .def("run", + [](vision::ocr::DBDetectorPreprocessor& self, + std::vector& im_list) { + std::vector images; + for (size_t i = 0; i < im_list.size(); ++i) { + images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); + } + std::vector outputs; + self.Run(&images, &outputs); + auto batch_det_img_info = self.GetBatchImgInfo(); + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].StopSharing(); + } + return std::make_pair(outputs, *batch_det_img_info); + }) + .def("disable_normalize", + [](vision::ocr::DBDetectorPreprocessor& self) { + self.DisableNormalize(); + }) + .def("disable_permute", [](vision::ocr::DBDetectorPreprocessor& self) { + self.DisablePermute(); }); pybind11::class_( @@ -135,21 +146,30 @@ void BindPPOCRModel(pybind11::module& m) { .def_property("is_scale", &vision::ocr::ClassifierPreprocessor::GetIsScale, &vision::ocr::ClassifierPreprocessor::SetIsScale) - .def("run", [](vision::ocr::ClassifierPreprocessor& self, - std::vector& im_list) { - std::vector images; - for (size_t i = 0; i < im_list.size(); ++i) { - images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); - } - std::vector outputs; - if (!self.Run(&images, &outputs)) { - throw std::runtime_error( - "Failed to preprocess the input data in ClassifierPreprocessor."); - } - for (size_t i = 0; i < outputs.size(); ++i) { - outputs[i].StopSharing(); - } - return outputs; + .def("run", + [](vision::ocr::ClassifierPreprocessor& self, + std::vector& im_list) { + std::vector images; + for (size_t i = 0; i < im_list.size(); ++i) { + images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); + } + std::vector outputs; + if (!self.Run(&images, &outputs)) { + throw std::runtime_error( + "Failed to preprocess the input data in " + "ClassifierPreprocessor."); + } + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].StopSharing(); + } + return outputs; + }) + .def("disable_normalize", + [](vision::ocr::ClassifierPreprocessor& self) { + self.DisableNormalize(); + }) + .def("disable_permute", [](vision::ocr::ClassifierPreprocessor& self) { + self.DisablePermute(); }); pybind11::class_( @@ -229,21 +249,30 @@ void BindPPOCRModel(pybind11::module& m) { .def_property("is_scale", &vision::ocr::RecognizerPreprocessor::GetIsScale, &vision::ocr::RecognizerPreprocessor::SetIsScale) - .def("run", [](vision::ocr::RecognizerPreprocessor& self, - std::vector& im_list) { - std::vector images; - for (size_t i = 0; i < im_list.size(); ++i) { - images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); - } - std::vector outputs; - if (!self.Run(&images, &outputs)) { - throw std::runtime_error( - "Failed to preprocess the input data in RecognizerPreprocessor."); - } - for (size_t i = 0; i < outputs.size(); ++i) { - outputs[i].StopSharing(); - } - return outputs; + .def("run", + [](vision::ocr::RecognizerPreprocessor& self, + std::vector& im_list) { + std::vector images; + for (size_t i = 0; i < im_list.size(); ++i) { + images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); + } + std::vector outputs; + if (!self.Run(&images, &outputs)) { + throw std::runtime_error( + "Failed to preprocess the input data in " + "RecognizerPreprocessor."); + } + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].StopSharing(); + } + return outputs; + }) + .def("disable_normalize", + [](vision::ocr::RecognizerPreprocessor& self) { + self.DisableNormalize(); + }) + .def("disable_permute", [](vision::ocr::RecognizerPreprocessor& self) { + self.DisablePermute(); }); pybind11::class_( diff --git a/fastdeploy/vision/ocr/ppocr/rec_preprocessor.cc b/fastdeploy/vision/ocr/ppocr/rec_preprocessor.cc index ad049fdce..59c7de279 100644 --- a/fastdeploy/vision/ocr/ppocr/rec_preprocessor.cc +++ b/fastdeploy/vision/ocr/ppocr/rec_preprocessor.cc @@ -13,22 +13,23 @@ // limitations under the License. #include "fastdeploy/vision/ocr/ppocr/rec_preprocessor.h" + +#include "fastdeploy/function/concat.h" #include "fastdeploy/utils/perf.h" #include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h" -#include "fastdeploy/function/concat.h" namespace fastdeploy { namespace vision { namespace ocr { void OcrRecognizerResizeImage(FDMat* mat, float max_wh_ratio, - const std::vector& rec_image_shape, bool static_shape_infer) { + const std::vector& rec_image_shape, + bool static_shape_infer) { int img_h, img_w; img_h = rec_image_shape[1]; img_w = rec_image_shape[2]; if (!static_shape_infer) { - img_w = int(img_h * max_wh_ratio); float ratio = float(mat->Width()) / float(mat->Height()); @@ -43,23 +44,29 @@ void OcrRecognizerResizeImage(FDMat* mat, float max_wh_ratio, } else { if (mat->Width() >= img_w) { - Resize::Run(mat, img_w, img_h); // Reszie W to 320 + Resize::Run(mat, img_w, img_h); // Reszie W to 320 } else { Resize::Run(mat, mat->Width(), img_h); Pad::Run(mat, 0, 0, 0, int(img_w - mat->Width()), {127, 127, 127}); // Pad to 320 - } + } } } -bool RecognizerPreprocessor::Run(std::vector* images, std::vector* outputs) { +bool RecognizerPreprocessor::Run(std::vector* images, + std::vector* outputs) { return Run(images, outputs, 0, images->size(), {}); } -bool RecognizerPreprocessor::Run(std::vector* images, std::vector* outputs, - size_t start_index, size_t end_index, const std::vector& indices) { - if (images->size() == 0 || end_index <= start_index || end_index > images->size()) { - FDERROR << "images->size() or index error. Correct is: 0 <= start_index < end_index <= images->size()" << std::endl; +bool RecognizerPreprocessor::Run(std::vector* images, + std::vector* outputs, + size_t start_index, size_t end_index, + const std::vector& indices) { + if (images->size() == 0 || end_index <= start_index || + end_index > images->size()) { + FDERROR << "images->size() or index error. Correct is: 0 <= start_index < " + "end_index <= images->size()" + << std::endl; return false; } @@ -67,7 +74,7 @@ bool RecognizerPreprocessor::Run(std::vector* images, std::vector* images, std::vectorat(real_index)); - OcrRecognizerResizeImage(mat, max_wh_ratio, rec_image_shape_, static_shape_infer_); - NormalizeAndPermute::Run(mat, mean_, scale_, is_scale_); + OcrRecognizerResizeImage(mat, max_wh_ratio, rec_image_shape_, + static_shape_infer_); + if (!disable_normalize_ && !disable_permute_) { + NormalizeAndPermute::Run(mat, mean_, scale_, is_scale_); + } else { + if (!disable_normalize_) { + Normalize::Run(mat, mean_, scale_, is_scale_); + } + if (!disable_permute_) { + HWC2CHW::Run(mat); + Cast::Run(mat, "float"); + } + } } // Only have 1 output Tensor. outputs->resize(1); - size_t tensor_size = end_index-start_index; + size_t tensor_size = end_index - start_index; // Concat all the preprocessed data to a batch tensor - std::vector tensors(tensor_size); + std::vector tensors(tensor_size); for (size_t i = 0; i < tensor_size; ++i) { size_t real_index = i + start_index; if (indices.size() != 0) { real_index = indices[i + start_index]; } - + (*images)[real_index].ShareWithTensor(&(tensors[i])); tensors[i].ExpandDim(0); } diff --git a/fastdeploy/vision/ocr/ppocr/rec_preprocessor.h b/fastdeploy/vision/ocr/ppocr/rec_preprocessor.h index f7d741b5d..c5edb2a80 100644 --- a/fastdeploy/vision/ocr/ppocr/rec_preprocessor.h +++ b/fastdeploy/vision/ocr/ppocr/rec_preprocessor.h @@ -66,7 +66,16 @@ class FASTDEPLOY_DECL RecognizerPreprocessor { /// Get rec_image_shape for the recognition preprocess std::vector GetRecImageShape() { return rec_image_shape_; } + /// This function will disable normalize in preprocessing step. + void DisableNormalize() { disable_permute_ = true; } + /// This function will disable hwc2chw in preprocessing step. + void DisablePermute() { disable_normalize_ = true; } + private: + // for recording the switch of hwc2chw + bool disable_permute_ = false; + // for recording the switch of normalize + bool disable_normalize_ = false; std::vector rec_image_shape_ = {3, 48, 320}; std::vector mean_ = {0.5f, 0.5f, 0.5f}; std::vector scale_ = {0.5f, 0.5f, 0.5f}; diff --git a/fastdeploy/vision/ocr/ppocr/recognizer.cc b/fastdeploy/vision/ocr/ppocr/recognizer.cc old mode 100755 new mode 100644 index 69e75b281..74a8a26a1 --- a/fastdeploy/vision/ocr/ppocr/recognizer.cc +++ b/fastdeploy/vision/ocr/ppocr/recognizer.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "fastdeploy/vision/ocr/ppocr/recognizer.h" + #include "fastdeploy/utils/perf.h" #include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h" @@ -26,17 +27,19 @@ Recognizer::Recognizer(const std::string& model_file, const std::string& params_file, const std::string& label_path, const RuntimeOption& custom_option, - const ModelFormat& model_format):postprocessor_(label_path) { + const ModelFormat& model_format) + : postprocessor_(label_path) { if (model_format == ModelFormat::ONNX) { - valid_cpu_backends = {Backend::ORT, - Backend::OPENVINO}; - valid_gpu_backends = {Backend::ORT, Backend::TRT}; + valid_cpu_backends = {Backend::ORT, Backend::OPENVINO}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; } else { - valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO, Backend::LITE}; + valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO, + Backend::LITE}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; valid_kunlunxin_backends = {Backend::LITE}; - valid_ascend_backends = {Backend::LITE}; + valid_ascend_backends = {Backend::LITE}; valid_sophgonpu_backends = {Backend::SOPHGOTPU}; + valid_rknpu_backends = {Backend::RKNPU2}; } runtime_option = custom_option; @@ -57,12 +60,14 @@ bool Recognizer::Initialize() { } std::unique_ptr Recognizer::Clone() const { - std::unique_ptr clone_model = utils::make_unique(Recognizer(*this)); + std::unique_ptr clone_model = + utils::make_unique(Recognizer(*this)); clone_model->SetRuntime(clone_model->CloneRuntime()); return clone_model; } -bool Recognizer::Predict(const cv::Mat& img, std::string* text, float* rec_score) { +bool Recognizer::Predict(const cv::Mat& img, std::string* text, + float* rec_score) { std::vector texts(1); std::vector rec_scores(1); bool success = BatchPredict({img}, &texts, &rec_scores); @@ -75,20 +80,24 @@ bool Recognizer::Predict(const cv::Mat& img, std::string* text, float* rec_score } bool Recognizer::BatchPredict(const std::vector& images, - std::vector* texts, std::vector* rec_scores) { + std::vector* texts, + std::vector* rec_scores) { return BatchPredict(images, texts, rec_scores, 0, images.size(), {}); } bool Recognizer::BatchPredict(const std::vector& images, - std::vector* texts, std::vector* rec_scores, - size_t start_index, size_t end_index, const std::vector& indices) { + std::vector* texts, + std::vector* rec_scores, + size_t start_index, size_t end_index, + const std::vector& indices) { size_t total_size = images.size(); if (indices.size() != 0 && indices.size() != total_size) { FDERROR << "indices.size() should be 0 or images.size()." << std::endl; return false; } std::vector fd_images = WrapMat(images); - if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, start_index, end_index, indices)) { + if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, start_index, + end_index, indices)) { FDERROR << "Failed to preprocess the input image." << std::endl; return false; } @@ -99,13 +108,15 @@ bool Recognizer::BatchPredict(const std::vector& images, return false; } - if (!postprocessor_.Run(reused_output_tensors_, texts, rec_scores, start_index, total_size, indices)) { - FDERROR << "Failed to postprocess the inference cls_results by runtime." << std::endl; + if (!postprocessor_.Run(reused_output_tensors_, texts, rec_scores, + start_index, total_size, indices)) { + FDERROR << "Failed to postprocess the inference cls_results by runtime." + << std::endl; return false; } return true; } -} // namesapce ocr +} // namespace ocr } // namespace vision } // namespace fastdeploy \ No newline at end of file diff --git a/fastdeploy/vision/visualize/ocr.cc b/fastdeploy/vision/visualize/ocr.cc index 4946e08e7..5d0bb9e1b 100644 --- a/fastdeploy/vision/visualize/ocr.cc +++ b/fastdeploy/vision/visualize/ocr.cc @@ -17,10 +17,14 @@ namespace fastdeploy { namespace vision { -cv::Mat VisOcr(const cv::Mat &im, const OCRResult &ocr_result) { +cv::Mat VisOcr(const cv::Mat& im, const OCRResult& ocr_result, + const float score_threshold) { auto vis_im = im.clone(); for (int n = 0; n < ocr_result.boxes.size(); n++) { + if (ocr_result.rec_scores[n] < score_threshold) { + continue; + } cv::Point rook_points[4]; for (int m = 0; m < 4; m++) { @@ -28,7 +32,7 @@ cv::Mat VisOcr(const cv::Mat &im, const OCRResult &ocr_result) { int(ocr_result.boxes[n][m * 2 + 1])); } - const cv::Point *ppt[1] = {rook_points}; + const cv::Point* ppt[1] = {rook_points}; int npt[] = {4}; cv::polylines(vis_im, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0); } @@ -36,7 +40,7 @@ cv::Mat VisOcr(const cv::Mat &im, const OCRResult &ocr_result) { return vis_im; } -cv::Mat Visualize::VisOcr(const cv::Mat &im, const OCRResult &ocr_result) { +cv::Mat Visualize::VisOcr(const cv::Mat& im, const OCRResult& ocr_result) { FDWARNING << "DEPRECATED: fastdeploy::vision::Visualize::VisOcr is deprecated, " "please use fastdeploy::vision:VisOcr function instead." @@ -51,7 +55,7 @@ cv::Mat Visualize::VisOcr(const cv::Mat &im, const OCRResult &ocr_result) { int(ocr_result.boxes[n][m * 2 + 1])); } - const cv::Point *ppt[1] = {rook_points}; + const cv::Point* ppt[1] = {rook_points}; int npt[] = {4}; cv::polylines(vis_im, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0); } diff --git a/fastdeploy/vision/visualize/visualize.h b/fastdeploy/vision/visualize/visualize.h index 4614dc4e1..f382818c6 100755 --- a/fastdeploy/vision/visualize/visualize.h +++ b/fastdeploy/vision/visualize/visualize.h @@ -15,8 +15,8 @@ #pragma once #include "fastdeploy/vision/common/result.h" -#include "opencv2/imgproc/imgproc.hpp" #include "fastdeploy/vision/tracking/pptracking/model.h" +#include "opencv2/imgproc/imgproc.hpp" namespace fastdeploy { /** \brief All C++ FastDeploy Vision Models APIs are defined inside this namespace @@ -41,9 +41,10 @@ class FASTDEPLOY_DECL Visualize { bool remove_small_connected_area = false); static cv::Mat RemoveSmallConnectedArea(const cv::Mat& alpha_pred, float threshold); - static cv::Mat SwapBackgroundMatting( - const cv::Mat& im, const cv::Mat& background, const MattingResult& result, - bool remove_small_connected_area = false); + static cv::Mat + SwapBackgroundMatting(const cv::Mat& im, const cv::Mat& background, + const MattingResult& result, + bool remove_small_connected_area = false); static cv::Mat SwapBackgroundSegmentation(const cv::Mat& im, const cv::Mat& background, int background_label, @@ -90,9 +91,11 @@ FASTDEPLOY_DECL cv::Mat VisDetection(const cv::Mat& im, * \param[in] font_size font size * \return cv::Mat type stores the visualized results */ -FASTDEPLOY_DECL cv::Mat VisClassification( - const cv::Mat& im, const ClassifyResult& result, int top_k = 5, - float score_threshold = 0.0f, float font_size = 0.5f); +FASTDEPLOY_DECL cv::Mat VisClassification(const cv::Mat& im, + const ClassifyResult& result, + int top_k = 5, + float score_threshold = 0.0f, + float font_size = 0.5f); /** \brief Show the visualized results with custom labels for classification models * * \param[in] im the input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format @@ -103,10 +106,10 @@ FASTDEPLOY_DECL cv::Mat VisClassification( * \param[in] font_size font size * \return cv::Mat type stores the visualized results */ -FASTDEPLOY_DECL cv::Mat VisClassification( - const cv::Mat& im, const ClassifyResult& result, - const std::vector& labels, int top_k = 5, - float score_threshold = 0.0f, float font_size = 0.5f); +FASTDEPLOY_DECL cv::Mat +VisClassification(const cv::Mat& im, const ClassifyResult& result, + const std::vector& labels, int top_k = 5, + float score_threshold = 0.0f, float font_size = 0.5f); /** \brief Show the visualized results for face detection models * * \param[in] im the input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format @@ -159,7 +162,8 @@ FASTDEPLOY_DECL cv::Mat VisMatting(const cv::Mat& im, * \param[in] result the result produced by model * \return cv::Mat type stores the visualized results */ -FASTDEPLOY_DECL cv::Mat VisOcr(const cv::Mat& im, const OCRResult& ocr_result); +FASTDEPLOY_DECL cv::Mat VisOcr(const cv::Mat& im, const OCRResult& ocr_result, + const float score_threshold = 0); FASTDEPLOY_DECL cv::Mat VisMOT(const cv::Mat& img, const MOTResult& results, float score_threshold = 0.0f, @@ -172,10 +176,10 @@ FASTDEPLOY_DECL cv::Mat VisMOT(const cv::Mat& img, const MOTResult& results, * \param[in] remove_small_connected_area if remove_small_connected_area==true, the visualized result will not include the small connected areas * \return cv::Mat type stores the visualized results */ -FASTDEPLOY_DECL cv::Mat SwapBackground(const cv::Mat& im, - const cv::Mat& background, - const MattingResult& result, - bool remove_small_connected_area = false); +FASTDEPLOY_DECL cv::Mat +SwapBackground(const cv::Mat& im, const cv::Mat& background, + const MattingResult& result, + bool remove_small_connected_area = false); /** \brief Swap the image background with SegmentationResult * * \param[in] im the input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format @@ -196,12 +200,11 @@ FASTDEPLOY_DECL cv::Mat SwapBackground(const cv::Mat& im, * \param[in] conf_threshold threshold for result scores, the result will not be shown if the score is less than conf_threshold * \return cv::Mat type stores the visualized results */ -FASTDEPLOY_DECL cv::Mat VisKeypointDetection(const cv::Mat& im, - const KeyPointDetectionResult& results, - float conf_threshold = 0.5f); +FASTDEPLOY_DECL cv::Mat +VisKeypointDetection(const cv::Mat& im, const KeyPointDetectionResult& results, + float conf_threshold = 0.5f); FASTDEPLOY_DECL cv::Mat VisHeadPose(const cv::Mat& im, - const HeadPoseResult& result, - int size = 50, + const HeadPoseResult& result, int size = 50, int line_size = 1); } // namespace vision diff --git a/python/fastdeploy/vision/ocr/ppocr/__init__.py b/python/fastdeploy/vision/ocr/ppocr/__init__.py index 403f70aa7..1fa39600b 100755 --- a/python/fastdeploy/vision/ocr/ppocr/__init__.py +++ b/python/fastdeploy/vision/ocr/ppocr/__init__.py @@ -65,6 +65,29 @@ class DBDetectorPreprocessor: """ self._preprocessor.set_normalize(mean, std, is_scale) + @property + def static_shape_infer(self): + return self._preprocessor.static_shape_infer + + @static_shape_infer.setter + def static_shape_infer(self, value): + assert isinstance( + value, + bool), "The value to set `static_shape_infer` must be type of bool." + self._preprocessor.static_shape_infer = value + + def disable_normalize(self): + """ + This function will disable normalize in preprocessing step. + """ + self._preprocessor.disable_normalize() + + def disable_permute(self): + """ + This function will disable hwc2chw in preprocessing step. + """ + self._preprocessor.disable_permute() + class DBDetectorPostprocessor: def __init__(self): @@ -358,6 +381,18 @@ class ClassifierPreprocessor: list), "The value to set `cls_image_shape` must be type of list." self._preprocessor.cls_image_shape = value + def disable_normalize(self): + """ + This function will disable normalize in preprocessing step. + """ + self._preprocessor.disable_normalize() + + def disable_permute(self): + """ + This function will disable hwc2chw in preprocessing step. + """ + self._preprocessor.disable_permute() + class ClassifierPostprocessor: def __init__(self): @@ -581,6 +616,18 @@ class RecognizerPreprocessor: list), "The value to set `rec_image_shape` must be type of list." self._preprocessor.rec_image_shape = value + def disable_normalize(self): + """ + This function will disable normalize in preprocessing step. + """ + self._preprocessor.disable_normalize() + + def disable_permute(self): + """ + This function will disable hwc2chw in preprocessing step. + """ + self._preprocessor.disable_permute() + class RecognizerPostprocessor: def __init__(self, label_path): diff --git a/tools/rknpu2/config/ppocrv3_cls.yaml b/tools/rknpu2/config/ppocrv3_cls.yaml new file mode 100644 index 000000000..197becc2f --- /dev/null +++ b/tools/rknpu2/config/ppocrv3_cls.yaml @@ -0,0 +1,15 @@ +mean: + - + - 127.5 + - 127.5 + - 127.5 +std: + - + - 127.5 + - 127.5 + - 127.5 +model_path: ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx +outputs_nodes: +do_quantization: False +dataset: +output_folder: "./ch_ppocr_mobile_v2.0_cls_infer" diff --git a/tools/rknpu2/config/ppocrv3_det.yaml b/tools/rknpu2/config/ppocrv3_det.yaml new file mode 100644 index 000000000..2897c5f74 --- /dev/null +++ b/tools/rknpu2/config/ppocrv3_det.yaml @@ -0,0 +1,15 @@ +mean: + - + - 123.675 + - 116.28 + - 103.53 +std: + - + - 58.395 + - 57.12 + - 57.375 +model_path: ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx +outputs_nodes: +do_quantization: False +dataset: +output_folder: "./ch_PP-OCRv3_det_infer" diff --git a/tools/rknpu2/config/ppocrv3_rec.yaml b/tools/rknpu2/config/ppocrv3_rec.yaml new file mode 100644 index 000000000..8a22a39a2 --- /dev/null +++ b/tools/rknpu2/config/ppocrv3_rec.yaml @@ -0,0 +1,15 @@ +mean: + - + - 127.5 + - 127.5 + - 127.5 +std: + - + - 127.5 + - 127.5 + - 127.5 +model_path: ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx +outputs_nodes: +do_quantization: False +dataset: +output_folder: "./ch_PP-OCRv3_rec_infer" diff --git a/tools/rknpu2/export.py b/tools/rknpu2/export.py index c42a1eade..a94b34885 100644 --- a/tools/rknpu2/export.py +++ b/tools/rknpu2/export.py @@ -65,7 +65,10 @@ if __name__ == "__main__": if not os.path.exists(yaml_config["output_folder"]): os.mkdir(yaml_config["output_folder"]) - model_base_name = os.path.basename(yaml_config["model_path"]).split(".")[0] + name_list = os.path.basename(yaml_config["model_path"]).split(".") + model_base_name = "" + for name in name_list[0:-1]: + model_base_name += name model_device_name = config.target_platform.lower() if yaml_config["do_quantization"]: model_save_name = model_base_name + "_" + model_device_name + "_quantized" + ".rknn"