From 8c3ccc2cc20b6caf07b214fd89a78fbbfbf7f6ff Mon Sep 17 00:00:00 2001
From: Zheng-Bicheng <58363586+Zheng-Bicheng@users.noreply.github.com>
Date: Mon, 27 Feb 2023 15:01:17 +0800
Subject: [PATCH] [Hackathon 182 Model] Update PPOCRV3 For RKNPU2 (#1403)
* update ppocrv3 for rknpu2
* add config
* add config
* detele unuseful
* update useful results
* Repair note
* Repair note
* fixed bugs
* update
---
FastDeploy.cmake.in | 4 +-
cmake/rknpu2.cmake | 11 +-
examples/vision/ocr/PP-OCRv3/rknpu2/README.md | 77 ++++++++++
.../ocr/PP-OCRv3/rknpu2/cpp/CMakeLists.txt | 14 ++
.../vision/ocr/PP-OCRv3/rknpu2/cpp/README.md | 55 +++++++
.../ocr/PP-OCRv3/rknpu2/cpp/README_CN.md | 63 ++++++++
.../PP-OCRv3/rknpu2/cpp/infer_static_shape.cc | 126 +++++++++++++++
.../ocr/PP-OCRv3/rknpu2/python/README.md | 49 ++++++
.../ocr/PP-OCRv3/rknpu2/python/README_CN.md | 62 ++++++++
.../rknpu2/python/infer_static_shape.py | 144 ++++++++++++++++++
fastdeploy/vision/ocr/ppocr/classifier.cc | 35 +++--
.../vision/ocr/ppocr/cls_preprocessor.cc | 30 ++--
.../vision/ocr/ppocr/cls_preprocessor.h | 9 ++
fastdeploy/vision/ocr/ppocr/dbdetector.cc | 1 +
.../vision/ocr/ppocr/det_preprocessor.cc | 11 +-
.../vision/ocr/ppocr/det_preprocessor.h | 30 ++++
.../vision/ocr/ppocr/ocrmodel_pybind.cc | 115 ++++++++------
.../vision/ocr/ppocr/rec_preprocessor.cc | 50 ++++--
.../vision/ocr/ppocr/rec_preprocessor.h | 9 ++
fastdeploy/vision/ocr/ppocr/recognizer.cc | 41 +++--
fastdeploy/vision/visualize/ocr.cc | 12 +-
fastdeploy/vision/visualize/visualize.h | 45 +++---
.../fastdeploy/vision/ocr/ppocr/__init__.py | 47 ++++++
tools/rknpu2/config/ppocrv3_cls.yaml | 15 ++
tools/rknpu2/config/ppocrv3_det.yaml | 15 ++
tools/rknpu2/config/ppocrv3_rec.yaml | 15 ++
tools/rknpu2/export.py | 5 +-
27 files changed, 958 insertions(+), 132 deletions(-)
create mode 100644 examples/vision/ocr/PP-OCRv3/rknpu2/README.md
create mode 100644 examples/vision/ocr/PP-OCRv3/rknpu2/cpp/CMakeLists.txt
create mode 100755 examples/vision/ocr/PP-OCRv3/rknpu2/cpp/README.md
create mode 100644 examples/vision/ocr/PP-OCRv3/rknpu2/cpp/README_CN.md
create mode 100644 examples/vision/ocr/PP-OCRv3/rknpu2/cpp/infer_static_shape.cc
create mode 100755 examples/vision/ocr/PP-OCRv3/rknpu2/python/README.md
create mode 100644 examples/vision/ocr/PP-OCRv3/rknpu2/python/README_CN.md
create mode 100755 examples/vision/ocr/PP-OCRv3/rknpu2/python/infer_static_shape.py
mode change 100755 => 100644 fastdeploy/vision/ocr/ppocr/classifier.cc
mode change 100755 => 100644 fastdeploy/vision/ocr/ppocr/cls_preprocessor.cc
mode change 100755 => 100644 fastdeploy/vision/ocr/ppocr/recognizer.cc
create mode 100644 tools/rknpu2/config/ppocrv3_cls.yaml
create mode 100644 tools/rknpu2/config/ppocrv3_det.yaml
create mode 100644 tools/rknpu2/config/ppocrv3_rec.yaml
diff --git a/FastDeploy.cmake.in b/FastDeploy.cmake.in
index 5c2c5b733..f5c2dbe83 100644
--- a/FastDeploy.cmake.in
+++ b/FastDeploy.cmake.in
@@ -131,9 +131,9 @@ endif()
if(ENABLE_RKNPU2_BACKEND)
if(RKNN2_TARGET_SOC STREQUAL "RK356X")
- set(RKNPU2_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/rknpu2_runtime/RK356X/lib/librknn_api.so)
+ set(RKNPU2_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/rknpu2_runtime/lib/librknnrt.so)
elseif (RKNN2_TARGET_SOC STREQUAL "RK3588")
- set(RKNPU2_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/rknpu2_runtime/RK3588/lib/librknn_api.so)
+ set(RKNPU2_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/rknpu2_runtime/lib/librknnrt.so)
else ()
message(FATAL_ERROR "RKNN2_TARGET_SOC is not set, ref value: RK356X or RK3588")
endif()
diff --git a/cmake/rknpu2.cmake b/cmake/rknpu2.cmake
index 7f11b0bfb..3d93a364f 100644
--- a/cmake/rknpu2.cmake
+++ b/cmake/rknpu2.cmake
@@ -1,7 +1,7 @@
# get RKNPU2_URL
set(RKNPU2_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/")
-set(RKNPU2_VERSION "1.4.0")
-set(RKNPU2_FILE "rknpu2_runtime-linux-x64-${RKNPU2_VERSION}.tgz")
+set(RKNPU2_VERSION "1.4.2b0")
+set(RKNPU2_FILE "rknpu2_runtime-linux-aarch64-${RKNPU2_VERSION}-${RKNN2_TARGET_SOC}.tgz")
set(RKNPU2_URL "${RKNPU2_URL_BASE}${RKNPU2_FILE}")
# download_and_decompress
@@ -10,11 +10,12 @@ download_and_decompress(${RKNPU2_URL} ${CMAKE_CURRENT_BINARY_DIR}/${RKNPU2_FILE}
# set path
set(RKNPU_RUNTIME_PATH ${THIRD_PARTY_PATH}/install/rknpu2_runtime)
+# include lib
if (EXISTS ${RKNPU_RUNTIME_PATH})
- set(RKNN_RT_LIB ${RKNPU_RUNTIME_PATH}/${RKNN2_TARGET_SOC}/lib/librknnrt.so)
- include_directories(${RKNPU_RUNTIME_PATH}/${RKNN2_TARGET_SOC}/include)
+ set(RKNN_RT_LIB ${RKNPU_RUNTIME_PATH}/lib/librknnrt.so)
+ include_directories(${RKNPU_RUNTIME_PATH}/include)
else ()
- message(FATAL_ERROR "[rknpu2.cmake] download_and_decompress rknpu2_runtime error")
+ message(FATAL_ERROR "[rknpu2.cmake] RKNPU_RUNTIME_PATH does not exist.")
endif ()
diff --git a/examples/vision/ocr/PP-OCRv3/rknpu2/README.md b/examples/vision/ocr/PP-OCRv3/rknpu2/README.md
new file mode 100644
index 000000000..06ba9fc6e
--- /dev/null
+++ b/examples/vision/ocr/PP-OCRv3/rknpu2/README.md
@@ -0,0 +1,77 @@
+# PaddleOCR 模型部署
+
+## PaddleOCR为多个模型组合串联任务,包含如下几个模型构成
+
+* 文本检测 `DBDetector`
+* [可选]方向分类 `Classifer` 用于调整进入文字识别前的图像方向
+* 文字识别 `Recognizer` 用于从图像中识别出文字
+
+根据不同场景, FastDeploy汇总提供如下OCR任务部署, 用户需同时下载3个模型与字典文件(或2个,分类器可选), 完成OCR整个预测流程
+
+## PP-OCR 中英文系列模型
+
+下表中的模型下载链接由PaddleOCR模型库提供, 详见[PP-OCR系列模型列表](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6/doc/doc_ch/models_list.md)
+
+| OCR版本 | 文本框检测 | 方向分类模型 | 文字识别 | 字典文件 | 说明 |
+|:-------------------|:---------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:--------------------------------------------------------|
+| ch_PP-OCRv3[推荐] | [ch_PP-OCRv3_det](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar) | [ch_ppocr_mobile_v2.0_cls](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) | [ch_PP-OCRv3_rec](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar) | [ppocr_keys_v1.txt](https://bj.bcebos.com/paddlehub/fastdeploy/ppocr_keys_v1.txt) | OCRv3系列原始超轻量模型,支持中英文、多语种文本检测 |
+| en_PP-OCRv3[推荐] | [en_PP-OCRv3_det](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar) | [ch_ppocr_mobile_v2.0_cls](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) | [en_PP-OCRv3_rec](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tar) | [en_dict.txt](https://bj.bcebos.com/paddlehub/fastdeploy/en_dict.txt) | OCRv3系列原始超轻量模型,支持英文与数字识别,除检测模型和识别模型的训练数据与中文模型不同以外,无其他区别 |
+| ch_PP-OCRv2 | [ch_PP-OCRv2_det](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar) | [ch_ppocr_mobile_v2.0_cls](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) | [ch_PP-OCRv2_rec](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar) | [ppocr_keys_v1.txt](https://bj.bcebos.com/paddlehub/fastdeploy/ppocr_keys_v1.txt) | OCRv2系列原始超轻量模型,支持中英文、多语种文本检测 |
+| ch_PP-OCRv2_mobile | [ch_ppocr_mobile_v2.0_det](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) | [ch_ppocr_mobile_v2.0_cls](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) | [ch_ppocr_mobile_v2.0_rec](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) | [ppocr_keys_v1.txt](https://bj.bcebos.com/paddlehub/fastdeploy/ppocr_keys_v1.txt) | OCRv2系列原始超轻量模型,支持中英文、多语种文本检测,比PPOCRv2更加轻量 |
+| ch_PP-OCRv2_server | [ch_ppocr_server_v2.0_det](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) | [ch_ppocr_mobile_v2.0_cls](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) | [ch_ppocr_server_v2.0_rec](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) | [ppocr_keys_v1.txt](https://bj.bcebos.com/paddlehub/fastdeploy/ppocr_keys_v1.txt) | OCRv2服务器系列模型, 支持中英文、多语种文本检测,比超轻量模型更大,但效果更好 |
+
+## 模型转换
+
+在RKNPU2上使用PPOCR时,我们需要把Paddle静态图模型转为RKNN模型。
+
+### 静态图模型转RKNN格式模型
+
+rknn_toolkit2工具暂不支持直接从Paddle静态图模型直接转换为RKNN模型,因此我们需要先将Paddle静态图模型转为RKNN模型。
+
+```bash
+# 下载模型和字典文件
+wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar
+tar -xvf ch_PP-OCRv3_det_infer.tar
+
+wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar
+tar -xvf ch_ppocr_mobile_v2.0_cls_infer.tar
+
+wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar
+tar -xvf ch_PP-OCRv3_rec_infer.tar
+
+# 转换模型到ONNX格式的模型
+paddle2onnx --model_dir ch_PP-OCRv3_det_infer \
+ --model_filename inference.pdmodel \
+ --params_filename inference.pdiparams \
+ --save_file ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \
+ --enable_dev_version True
+paddle2onnx --model_dir ch_ppocr_mobile_v2.0_cls_infer \
+ --model_filename inference.pdmodel \
+ --params_filename inference.pdiparams \
+ --save_file ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \
+ --enable_dev_version True
+paddle2onnx --model_dir ch_PP-OCRv3_rec_infer \
+ --model_filename inference.pdmodel \
+ --params_filename inference.pdiparams \
+ --save_file ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \
+ --enable_dev_version True
+
+# 固定模型的输入shape
+python -m paddle2onnx.optimize --input_model ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \
+ --output_model ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \
+ --input_shape_dict "{'x':[1,3,960,960]}"
+python -m paddle2onnx.optimize --input_model ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \
+ --output_model ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \
+ --input_shape_dict "{'x':[1,3,48,192]}"
+python -m paddle2onnx.optimize --input_model ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \
+ --output_model ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \
+ --input_shape_dict "{'x':[1,3,48,320]}"
+
+# 转换ONNX模型到RKNN模型
+python tools/rknpu2/export.py --config_path tools/rknpu2/config/ppocrv3_det.yaml \
+ --target_platform rk3588
+python tools/rknpu2/export.py --config_path tools/rknpu2/config/ppocrv3_rec.yaml \
+ --target_platform rk3588
+python tools/rknpu2/export.py --config_path tools/rknpu2/config/ppocrv3_cls.yaml \
+ --target_platform rk3588
+```
diff --git a/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/CMakeLists.txt b/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/CMakeLists.txt
new file mode 100644
index 000000000..9538fea6b
--- /dev/null
+++ b/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/CMakeLists.txt
@@ -0,0 +1,14 @@
+PROJECT(infer_demo C CXX)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
+
+# 指定下载解压后的fastdeploy库路径
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+# 添加FastDeploy依赖头文件
+include_directories(${FASTDEPLOY_INCS})
+
+add_executable(infer_static_shape_demo ${PROJECT_SOURCE_DIR}/infer_static_shape.cc)
+# 添加FastDeploy库依赖
+target_link_libraries(infer_static_shape_demo ${FASTDEPLOY_LIBS})
diff --git a/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/README.md b/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/README.md
new file mode 100755
index 000000000..af5be5360
--- /dev/null
+++ b/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/README.md
@@ -0,0 +1,55 @@
+English | [简体中文](README_CN.md)
+# PPOCRv3 C++ Deployment Example
+
+This directory provides examples that `infer.cc` fast finishes the deployment of PPOCRv3 on CPU/GPU and GPU accelerated by TensorRT.
+
+Two steps before deployment
+
+- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)
+- 2. Download the precompiled deployment library and samples code according to your development environment. Refer to [FastDeploy Precompiled Library](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)
+
+Taking the CPU inference on Linux as an example, the compilation test can be completed by executing the following command in this directory. FastDeploy version 0.7.0 or above (x.x.x>=0.7.0) is required to support this model.
+
+```
+mkdir build
+cd build
+# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
+make -j
+
+
+# Download model, image, and dictionary files
+wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg
+
+wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt
+
+# CPU推理
+./infer_static_shape_demo ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \
+ ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \
+ ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \
+ ./ppocr_keys_v1.txt \
+ ./12.jpg \
+ 0
+# RKNPU推理
+./infer_static_shape_demo ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer_rk3588_unquantized.rknn \
+ ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v20_cls_infer_rk3588_unquantized.rknn \
+ ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer_rk3588_unquantized.rknn \
+ ./ppocr_keys_v1.txt \
+ ./12.jpg \
+ 1
+```
+
+The above command works for Linux or MacOS. For SDK in Windows, refer to:
+- [How to use FastDeploy C++ SDK in Windows](../../../../../docs/cn/faq/use_sdk_on_windows.md)
+
+The visualized result after running is as follows
+
+
+
+## Other Documents
+
+- [C++ API Reference](https://baidu-paddle.github.io/fastdeploy-api/cpp/html/)
+- [PPOCR Model Description](../../)
+- [PPOCRv3 Python Deployment](../python)
+- [Model Prediction Results](../../../../../../docs/en/faq/how_to_change_backend.md)
+- [How to switch the model inference backend engine](../../../../../../docs/en/faq/how_to_change_backend.md)
diff --git a/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/README_CN.md b/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/README_CN.md
new file mode 100644
index 000000000..82860ddc5
--- /dev/null
+++ b/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/README_CN.md
@@ -0,0 +1,63 @@
+[English](README_CN.md) | 简体中文
+# PPOCRv3 C++部署示例
+
+本目录下提供`infer.cc`快速完成PPOCRv3在CPU/GPU,以及GPU上通过TensorRT加速部署的示例。
+
+在部署前,需确认你已经成功完成以下两个操作:
+
+* [正确编译FastDeploy SDK](../../../../../../docs/cn/faq/rknpu2/build.md).
+* [成功转换模型](../README.md).
+
+在本目录执行如下命令即可完成编译测试,支持此模型需保证FastDeploy版本1.0.3以上(x.x.x>1.0.3), RKNN版本在1.4.1b22以上。
+
+```
+mkdir build
+cd build
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
+make -j
+
+# 下载图片和字典文件
+wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg
+wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt
+
+
+# 拷贝RKNN模型到build目录
+
+# CPU推理
+./infer_static_shape_demo ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \
+ ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \
+ ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \
+ ./ppocr_keys_v1.txt \
+ ./12.jpg \
+ 0
+# RKNPU推理
+./infer_static_shape_demo ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer_rk3588_unquantized.rknn \
+ ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v20_cls_infer_rk3588_unquantized.rknn \
+ ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer_rk3588_unquantized.rknn \
+ ./ppocr_keys_v1.txt \
+ ./12.jpg \
+ 1
+```
+
+运行完成可视化结果如下图所示:
+
+
+
+结果输出如下:
+
+```text
+det boxes: [[276,174],[285,173],[285,178],[276,179]]rec text: rec score:0.000000 cls label: 1 cls score: 0.766602
+det boxes: [[43,408],[483,390],[483,431],[44,449]]rec text: 上海斯格威铂尔曼大酒店 rec score:0.888450 cls label: 0 cls score: 1.000000
+det boxes: [[186,456],[399,448],[399,480],[186,488]]rec text: 打浦路15号 rec score:0.988769 cls label: 0 cls score: 1.000000
+det boxes: [[18,501],[513,485],[514,537],[18,554]]rec text: 绿洲仕格维花园公寓 rec score:0.992730 cls label: 0 cls score: 1.000000
+det boxes: [[78,553],[404,541],[404,573],[78,585]]rec text: 打浦路252935号 rec score:0.983545 cls label: 0 cls score: 1.000000
+Visualized result saved in ./vis_result.jpg
+```
+
+
+## 其它文档
+
+- [C++ API查阅](https://baidu-paddle.github.io/fastdeploy-api/cpp/html/)
+- [PPOCR 系列模型介绍](../../../README_CN.md)
+- [PPOCRv3 Python部署](../python)
+- [模型预测结果说明](../../../../../../docs/cn/faq/how_to_change_backend.md)
diff --git a/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/infer_static_shape.cc b/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/infer_static_shape.cc
new file mode 100644
index 000000000..7add35688
--- /dev/null
+++ b/examples/vision/ocr/PP-OCRv3/rknpu2/cpp/infer_static_shape.cc
@@ -0,0 +1,126 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+
+void InitAndInfer(const std::string &det_model_file,
+ const std::string &cls_model_file,
+ const std::string &rec_model_file,
+ const std::string &rec_label_file,
+ const std::string &image_file,
+ const fastdeploy::RuntimeOption &option,
+ const fastdeploy::ModelFormat &format) {
+ auto det_params_file = "";
+ auto cls_params_file = "";
+ auto rec_params_file = "";
+
+ auto det_option = option;
+ auto cls_option = option;
+ auto rec_option = option;
+
+ if (format == fastdeploy::ONNX) {
+ std::cout << "ONNX Model" << std::endl;
+ }
+
+ auto det_model = fastdeploy::vision::ocr::DBDetector(
+ det_model_file, det_params_file, det_option, format);
+ auto cls_model = fastdeploy::vision::ocr::Classifier(
+ cls_model_file, cls_params_file, cls_option, format);
+ auto rec_model = fastdeploy::vision::ocr::Recognizer(
+ rec_model_file, rec_params_file, rec_label_file, rec_option, format);
+
+ if (format == fastdeploy::RKNN) {
+ cls_model.GetPreprocessor().DisableNormalize();
+ cls_model.GetPreprocessor().DisablePermute();
+
+ det_model.GetPreprocessor().DisableNormalize();
+ det_model.GetPreprocessor().DisablePermute();
+
+ rec_model.GetPreprocessor().DisableNormalize();
+ rec_model.GetPreprocessor().DisablePermute();
+ }
+ det_model.GetPreprocessor().SetStaticShapeInfer(true);
+ rec_model.GetPreprocessor().SetStaticShapeInfer(true);
+
+ assert(det_model.Initialized());
+ assert(cls_model.Initialized());
+ assert(rec_model.Initialized());
+
+ // The classification model is optional, so the PP-OCR can also be connected
+ // in series as follows auto ppocr_v3 =
+ // fastdeploy::pipeline::PPOCRv3(&det_model, &rec_model);
+ auto ppocr_v3 =
+ fastdeploy::pipeline::PPOCRv3(&det_model, &cls_model, &rec_model);
+
+ // When users enable static shape infer for rec model, the batch size of cls
+ // and rec model must to be set to 1.
+ ppocr_v3.SetClsBatchSize(1);
+ ppocr_v3.SetRecBatchSize(1);
+
+ if (!ppocr_v3.Initialized()) {
+ std::cerr << "Failed to initialize PP-OCR." << std::endl;
+ return;
+ }
+
+ auto im = cv::imread(image_file);
+
+ fastdeploy::vision::OCRResult result;
+ if (!ppocr_v3.Predict(im, &result)) {
+ std::cerr << "Failed to predict." << std::endl;
+ return;
+ }
+
+ std::cout << result.Str() << std::endl;
+
+ auto vis_im = fastdeploy::vision::VisOcr(im, result);
+ cv::imwrite("vis_result.jpg", vis_im);
+ std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
+int main(int argc, char *argv[]) {
+ if (argc < 7) {
+ std::cout << "Usage: infer_demo path/to/det_model path/to/cls_model "
+ "path/to/rec_model path/to/rec_label_file path/to/image "
+ "run_option, "
+ "e.g ./infer_demo ./ch_PP-OCRv3_det_infer "
+ "./ch_ppocr_mobile_v2.0_cls_infer ./ch_PP-OCRv3_rec_infer "
+ "./ppocr_keys_v1.txt ./12.jpg 0"
+ << std::endl;
+ std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
+ "with ascend."
+ << std::endl;
+ return -1;
+ }
+
+ fastdeploy::RuntimeOption option;
+ fastdeploy::ModelFormat format;
+ int flag = std::atoi(argv[6]);
+
+ if (flag == 0) {
+ option.UseCpu();
+ format = fastdeploy::ONNX;
+ } else if (flag == 1) {
+ option.UseRKNPU2();
+ format = fastdeploy::RKNN;
+ }
+
+ std::string det_model_dir = argv[1];
+ std::string cls_model_dir = argv[2];
+ std::string rec_model_dir = argv[3];
+ std::string rec_label_file = argv[4];
+ std::string test_image = argv[5];
+ InitAndInfer(det_model_dir, cls_model_dir, rec_model_dir, rec_label_file,
+ test_image, option, format);
+ return 0;
+}
diff --git a/examples/vision/ocr/PP-OCRv3/rknpu2/python/README.md b/examples/vision/ocr/PP-OCRv3/rknpu2/python/README.md
new file mode 100755
index 000000000..d281daf83
--- /dev/null
+++ b/examples/vision/ocr/PP-OCRv3/rknpu2/python/README.md
@@ -0,0 +1,49 @@
+English | [简体中文](README_CN.md)
+# PPOCRv3 Python Deployment Example
+
+Two steps before deployment
+
+- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)
+- 2. Install FastDeploy Python whl package. Refer to [FastDeploy Python Installation](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)
+
+This directory provides examples that `infer.py` fast finishes the deployment of PPOCRv3 on CPU/GPU and GPU accelerated by TensorRT. The script is as follows
+
+```
+wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg
+wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt
+
+# Download the example code for deployment
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd examples/vision/ocr/PP-OCRv3/python/
+
+python3 infer_static_shape.py \
+ --det_model ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \
+ --cls_model ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \
+ --rec_model ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \
+ --rec_label_file ./ppocr_keys_v1.txt \
+ --image 12.jpg \
+ --device cpu
+
+# NPU推理
+python3 infer_static_shape.py \
+ --det_model ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer_rk3588_unquantized.rknn \
+ --cls_model ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v20_cls_infer_rk3588_unquantized.rknn \
+ --rec_model ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer_rk3588_unquantized.rknn \
+ --rec_label_file ppocr_keys_v1.txt \
+ --image 12.jpg \
+ --device npu
+```
+
+The visualized result after running is as follows
+
+
+
+
+
+## Other Documents
+
+- [Python API reference](https://baidu-paddle.github.io/fastdeploy-api/python/html/)
+- [PPOCR Model Description](../../)
+- [PPOCRv3 C++ Deployment](../cpp)
+- [Model Prediction Results](../../../../../../docs/api/vision_results/)
+- [How to switch the model inference backend engine](../../../../../../docs/en/faq/how_to_change_backend.md)
diff --git a/examples/vision/ocr/PP-OCRv3/rknpu2/python/README_CN.md b/examples/vision/ocr/PP-OCRv3/rknpu2/python/README_CN.md
new file mode 100644
index 000000000..663a6b62d
--- /dev/null
+++ b/examples/vision/ocr/PP-OCRv3/rknpu2/python/README_CN.md
@@ -0,0 +1,62 @@
+[English](README.md) | 简体中文
+# PPOCRv3 Python部署示例
+
+在部署前,需确认以下两个步骤
+
+- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+- 2. FastDeploy Python whl包安装,参考[FastDeploy Python安装](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+本目录下提供`infer.py`快速完成PPOCRv3在CPU/GPU,以及GPU上通过TensorRT加速部署的示例。执行如下脚本即可完成
+
+```
+
+# 下载模型,图片和字典文件
+wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar
+tar xvf ch_PP-OCRv3_det_infer.tar
+
+wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar
+tar -xvf ch_ppocr_mobile_v2.0_cls_infer.tar
+
+wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar
+tar xvf ch_PP-OCRv3_rec_infer.tar
+
+wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg
+
+wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt
+
+#下载部署示例代码
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd examples/vision/ocr/PP-OCRv3/python/
+
+# CPU推理
+python3 infer_static_shape.py \
+ --det_model ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \
+ --cls_model ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \
+ --rec_model ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \
+ --rec_label_file ./ppocr_keys_v1.txt \
+ --image 12.jpg \
+ --device cpu
+
+# NPU推理
+python3 infer_static_shape.py \
+ --det_model ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer_rk3588_unquantized.rknn \
+ --cls_model ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v20_cls_infer_rk3588_unquantized.rknn \
+ --rec_model ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer_rk3588_unquantized.rknn \
+ --rec_label_file ppocr_keys_v1.txt \
+ --image 12.jpg \
+ --device npu
+```
+
+运行完成可视化结果如下图所示
+
+
+
+
+
+## 其它文档
+
+- [Python API文档查阅](https://baidu-paddle.github.io/fastdeploy-api/python/html/)
+- [PPOCR 系列模型介绍](../../)
+- [PPOCRv3 C++部署](../cpp)
+- [模型预测结果说明](../../../../../../docs/api/vision_results/)
+- [如何切换模型推理后端引擎](../../../../../../docs/cn/faq/how_to_change_backend.md)
diff --git a/examples/vision/ocr/PP-OCRv3/rknpu2/python/infer_static_shape.py b/examples/vision/ocr/PP-OCRv3/rknpu2/python/infer_static_shape.py
new file mode 100755
index 000000000..7aa138217
--- /dev/null
+++ b/examples/vision/ocr/PP-OCRv3/rknpu2/python/infer_static_shape.py
@@ -0,0 +1,144 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import fastdeploy as fd
+import cv2
+import os
+
+
+def parse_arguments():
+ import argparse
+ import ast
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--det_model", required=True, help="Path of Detection model of PPOCR.")
+ parser.add_argument(
+ "--cls_model",
+ required=True,
+ help="Path of Classification model of PPOCR.")
+ parser.add_argument(
+ "--rec_model",
+ required=True,
+ help="Path of Recognization model of PPOCR.")
+ parser.add_argument(
+ "--rec_label_file",
+ required=True,
+ help="Path of Recognization model of PPOCR.")
+ parser.add_argument(
+ "--image", type=str, required=True, help="Path of test image file.")
+ parser.add_argument(
+ "--device",
+ type=str,
+ default='cpu',
+ help="Type of inference device, support 'cpu', 'kunlunxin' or 'gpu'.")
+ parser.add_argument(
+ "--cpu_thread_num",
+ type=int,
+ default=9,
+ help="Number of threads while inference on CPU.")
+ return parser.parse_args()
+
+
+def build_option(args):
+
+ det_option = fd.RuntimeOption()
+ cls_option = fd.RuntimeOption()
+ rec_option = fd.RuntimeOption()
+ if args.device == "npu":
+ det_option.use_rknpu2()
+ cls_option.use_rknpu2()
+ rec_option.use_rknpu2()
+
+ return det_option, cls_option, rec_option
+
+
+def build_format(args):
+ det_format = fd.ModelFormat.ONNX
+ cls_format = fd.ModelFormat.ONNX
+ rec_format = fd.ModelFormat.ONNX
+ if args.device == "npu":
+ det_format = fd.ModelFormat.RKNN
+ cls_format = fd.ModelFormat.RKNN
+ rec_format = fd.ModelFormat.RKNN
+
+ return det_format, cls_format, rec_format
+
+
+args = parse_arguments()
+
+# Detection模型, 检测文字框
+det_model_file = args.det_model
+det_params_file = ""
+# Classification模型,方向分类,可选
+cls_model_file = args.cls_model
+cls_params_file = ""
+# Recognition模型,文字识别模型
+rec_model_file = args.rec_model
+rec_params_file = ""
+rec_label_file = args.rec_label_file
+
+det_option, cls_option, rec_option = build_option(args)
+det_format, cls_format, rec_format = build_format(args)
+
+det_model = fd.vision.ocr.DBDetector(
+ det_model_file,
+ det_params_file,
+ runtime_option=det_option,
+ model_format=det_format)
+
+cls_model = fd.vision.ocr.Classifier(
+ cls_model_file,
+ cls_params_file,
+ runtime_option=cls_option,
+ model_format=cls_format)
+
+rec_model = fd.vision.ocr.Recognizer(
+ rec_model_file,
+ rec_params_file,
+ rec_label_file,
+ runtime_option=rec_option,
+ model_format=rec_format)
+
+# Det,Rec模型启用静态shape推理
+det_model.preprocessor.static_shape_infer = True
+rec_model.preprocessor.static_shape_infer = True
+
+if args.device == "npu":
+ det_model.preprocessor.disable_normalize()
+ det_model.preprocessor.disable_permute()
+ cls_model.preprocessor.disable_normalize()
+ cls_model.preprocessor.disable_permute()
+ rec_model.preprocessor.disable_normalize()
+ rec_model.preprocessor.disable_permute()
+
+# 创建PP-OCR,串联3个模型,其中cls_model可选,如无需求,可设置为None
+ppocr_v3 = fd.vision.ocr.PPOCRv3(
+ det_model=det_model, cls_model=cls_model, rec_model=rec_model)
+
+# Cls模型和Rec模型的batch size 必须设置为1, 开启静态shape推理
+ppocr_v3.cls_batch_size = 1
+ppocr_v3.rec_batch_size = 1
+
+# 预测图片准备
+im = cv2.imread(args.image)
+
+#预测并打印结果
+result = ppocr_v3.predict(im)
+
+print(result)
+
+# 可视化结果
+vis_im = fd.vision.vis_ppocr(im, result)
+cv2.imwrite("visualized_result.jpg", vis_im)
+print("Visualized result save in ./visualized_result.jpg")
diff --git a/fastdeploy/vision/ocr/ppocr/classifier.cc b/fastdeploy/vision/ocr/ppocr/classifier.cc
old mode 100755
new mode 100644
index 55f355db2..b7dcc502b
--- a/fastdeploy/vision/ocr/ppocr/classifier.cc
+++ b/fastdeploy/vision/ocr/ppocr/classifier.cc
@@ -13,6 +13,7 @@
// limitations under the License.
#include "fastdeploy/vision/ocr/ppocr/classifier.h"
+
#include "fastdeploy/utils/perf.h"
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
@@ -26,15 +27,16 @@ Classifier::Classifier(const std::string& model_file,
const RuntimeOption& custom_option,
const ModelFormat& model_format) {
if (model_format == ModelFormat::ONNX) {
- valid_cpu_backends = {Backend::ORT,
- Backend::OPENVINO};
- valid_gpu_backends = {Backend::ORT, Backend::TRT};
+ valid_cpu_backends = {Backend::ORT, Backend::OPENVINO};
+ valid_gpu_backends = {Backend::ORT, Backend::TRT};
} else {
- valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO, Backend::LITE};
+ valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO,
+ Backend::LITE};
valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
valid_kunlunxin_backends = {Backend::LITE};
valid_ascend_backends = {Backend::LITE};
valid_sophgonpu_backends = {Backend::SOPHGOTPU};
+ valid_rknpu_backends = {Backend::RKNPU2};
}
runtime_option = custom_option;
runtime_option.model_format = model_format;
@@ -54,16 +56,18 @@ bool Classifier::Initialize() {
}
std::unique_ptr Classifier::Clone() const {
- std::unique_ptr clone_model = utils::make_unique(Classifier(*this));
+ std::unique_ptr clone_model =
+ utils::make_unique(Classifier(*this));
clone_model->SetRuntime(clone_model->CloneRuntime());
return clone_model;
}
-bool Classifier::Predict(const cv::Mat& img, int32_t* cls_label, float* cls_score) {
+bool Classifier::Predict(const cv::Mat& img, int32_t* cls_label,
+ float* cls_score) {
std::vector cls_labels(1);
std::vector cls_scores(1);
bool success = BatchPredict({img}, &cls_labels, &cls_scores);
- if(!success){
+ if (!success) {
return success;
}
*cls_label = cls_labels[0];
@@ -72,16 +76,19 @@ bool Classifier::Predict(const cv::Mat& img, int32_t* cls_label, float* cls_scor
}
bool Classifier::BatchPredict(const std::vector& images,
- std::vector* cls_labels, std::vector* cls_scores) {
+ std::vector* cls_labels,
+ std::vector* cls_scores) {
return BatchPredict(images, cls_labels, cls_scores, 0, images.size());
}
bool Classifier::BatchPredict(const std::vector& images,
- std::vector* cls_labels, std::vector* cls_scores,
+ std::vector* cls_labels,
+ std::vector* cls_scores,
size_t start_index, size_t end_index) {
size_t total_size = images.size();
std::vector fd_images = WrapMat(images);
- if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, start_index, end_index)) {
+ if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, start_index,
+ end_index)) {
FDERROR << "Failed to preprocess the input image." << std::endl;
return false;
}
@@ -91,13 +98,15 @@ bool Classifier::BatchPredict(const std::vector& images,
return false;
}
- if (!postprocessor_.Run(reused_output_tensors_, cls_labels, cls_scores, start_index, total_size)) {
- FDERROR << "Failed to postprocess the inference cls_results by runtime." << std::endl;
+ if (!postprocessor_.Run(reused_output_tensors_, cls_labels, cls_scores,
+ start_index, total_size)) {
+ FDERROR << "Failed to postprocess the inference cls_results by runtime."
+ << std::endl;
return false;
}
return true;
}
-} // namesapce ocr
+} // namespace ocr
} // namespace vision
} // namespace fastdeploy
diff --git a/fastdeploy/vision/ocr/ppocr/cls_preprocessor.cc b/fastdeploy/vision/ocr/ppocr/cls_preprocessor.cc
old mode 100755
new mode 100644
index dcd76c168..35f98acc9
--- a/fastdeploy/vision/ocr/ppocr/cls_preprocessor.cc
+++ b/fastdeploy/vision/ocr/ppocr/cls_preprocessor.cc
@@ -13,9 +13,10 @@
// limitations under the License.
#include "fastdeploy/vision/ocr/ppocr/cls_preprocessor.h"
+
+#include "fastdeploy/function/concat.h"
#include "fastdeploy/utils/perf.h"
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
-#include "fastdeploy/function/concat.h"
namespace fastdeploy {
namespace vision {
@@ -38,34 +39,43 @@ void OcrClassifierResizeImage(FDMat* mat,
Resize::Run(mat, resize_w, img_h);
}
-bool ClassifierPreprocessor::Run(std::vector* images, std::vector* outputs) {
+bool ClassifierPreprocessor::Run(std::vector* images,
+ std::vector* outputs) {
return Run(images, outputs, 0, images->size());
}
-bool ClassifierPreprocessor::Run(std::vector* images, std::vector* outputs,
+bool ClassifierPreprocessor::Run(std::vector* images,
+ std::vector* outputs,
size_t start_index, size_t end_index) {
-
- if (images->size() == 0 || start_index <0 || end_index <= start_index || end_index > images->size()) {
- FDERROR << "images->size() or index error. Correct is: 0 <= start_index < end_index <= images->size()" << std::endl;
+ if (images->size() == 0 || start_index < 0 || end_index <= start_index ||
+ end_index > images->size()) {
+ FDERROR << "images->size() or index error. Correct is: 0 <= start_index < "
+ "end_index <= images->size()"
+ << std::endl;
return false;
}
for (size_t i = start_index; i < end_index; ++i) {
FDMat* mat = &(images->at(i));
OcrClassifierResizeImage(mat, cls_image_shape_);
- Normalize::Run(mat, mean_, scale_, is_scale_);
+ if (!disable_normalize_) {
+ Normalize::Run(mat, mean_, scale_, is_scale_);
+ }
std::vector value = {0, 0, 0};
if (mat->Width() < cls_image_shape_[2]) {
Pad::Run(mat, 0, 0, 0, cls_image_shape_[2] - mat->Width(), value);
}
- HWC2CHW::Run(mat);
- Cast::Run(mat, "float");
+
+ if (!disable_permute_) {
+ HWC2CHW::Run(mat);
+ Cast::Run(mat, "float");
+ }
}
// Only have 1 output Tensor.
outputs->resize(1);
// Concat all the preprocessed data to a batch tensor
size_t tensor_size = end_index - start_index;
- std::vector tensors(tensor_size);
+ std::vector tensors(tensor_size);
for (size_t i = 0; i < tensor_size; ++i) {
(*images)[i + start_index].ShareWithTensor(&(tensors[i]));
tensors[i].ExpandDim(0);
diff --git a/fastdeploy/vision/ocr/ppocr/cls_preprocessor.h b/fastdeploy/vision/ocr/ppocr/cls_preprocessor.h
index 52b2bb737..921f3f826 100644
--- a/fastdeploy/vision/ocr/ppocr/cls_preprocessor.h
+++ b/fastdeploy/vision/ocr/ppocr/cls_preprocessor.h
@@ -56,7 +56,16 @@ class FASTDEPLOY_DECL ClassifierPreprocessor {
/// Get cls_image_shape for the classification preprocess
std::vector GetClsImageShape() const { return cls_image_shape_; }
+ /// This function will disable normalize in preprocessing step.
+ void DisableNormalize() { disable_permute_ = true; }
+ /// This function will disable hwc2chw in preprocessing step.
+ void DisablePermute() { disable_normalize_ = true; }
+
private:
+ // for recording the switch of hwc2chw
+ bool disable_permute_ = false;
+ // for recording the switch of normalize
+ bool disable_normalize_ = false;
std::vector mean_ = {0.5f, 0.5f, 0.5f};
std::vector scale_ = {0.5f, 0.5f, 0.5f};
bool is_scale_ = true;
diff --git a/fastdeploy/vision/ocr/ppocr/dbdetector.cc b/fastdeploy/vision/ocr/ppocr/dbdetector.cc
index 7dd0ac84a..914b952f2 100644
--- a/fastdeploy/vision/ocr/ppocr/dbdetector.cc
+++ b/fastdeploy/vision/ocr/ppocr/dbdetector.cc
@@ -36,6 +36,7 @@ DBDetector::DBDetector(const std::string& model_file,
valid_kunlunxin_backends = {Backend::LITE};
valid_ascend_backends = {Backend::LITE};
valid_sophgonpu_backends = {Backend::SOPHGOTPU};
+ valid_rknpu_backends = {Backend::RKNPU2};
}
runtime_option = custom_option;
diff --git a/fastdeploy/vision/ocr/ppocr/det_preprocessor.cc b/fastdeploy/vision/ocr/ppocr/det_preprocessor.cc
index 69687d5cd..06f47b6ef 100644
--- a/fastdeploy/vision/ocr/ppocr/det_preprocessor.cc
+++ b/fastdeploy/vision/ocr/ppocr/det_preprocessor.cc
@@ -20,9 +20,13 @@ namespace fastdeploy {
namespace vision {
namespace ocr {
-std::array OcrDetectorGetInfo(FDMat* img, int max_size_len) {
+std::array DBDetectorPreprocessor::OcrDetectorGetInfo(
+ FDMat* img, int max_size_len) {
int w = img->Width();
int h = img->Height();
+ if (static_shape_infer_) {
+ return {w, h, det_image_shape_[2], det_image_shape_[1]};
+ }
float ratio = 1.f;
int max_wh = w >= h ? w : h;
@@ -86,7 +90,10 @@ bool DBDetectorPreprocessor::Apply(FDMatBatch* image_batch,
ResizeImage(mat, batch_det_img_info_[i][2], batch_det_img_info_[i][3],
max_resize_w, max_resize_h);
}
- (*normalize_permute_op_)(image_batch);
+
+ if (!disable_normalize_ && !disable_permute_) {
+ (*normalize_permute_op_)(image_batch);
+ }
outputs->resize(1);
FDTensor* tensor = image_batch->Tensor();
diff --git a/fastdeploy/vision/ocr/ppocr/det_preprocessor.h b/fastdeploy/vision/ocr/ppocr/det_preprocessor.h
index fd7b77de1..32ef80011 100644
--- a/fastdeploy/vision/ocr/ppocr/det_preprocessor.h
+++ b/fastdeploy/vision/ocr/ppocr/det_preprocessor.h
@@ -59,14 +59,44 @@ class FASTDEPLOY_DECL DBDetectorPreprocessor : public ProcessorManager {
return &batch_det_img_info_;
}
+ /// This function will disable normalize in preprocessing step.
+ void DisableNormalize() { disable_permute_ = true; }
+ /// This function will disable hwc2chw in preprocessing step.
+ void DisablePermute() { disable_normalize_ = true; }
+
+ /// Set det_image_shape for the detection preprocess.
+ /// This api is usually used when you retrain the model.
+ /// Generally, you do not need to use it.
+ void SetDetImageShape(const std::vector& det_image_shape) {
+ det_image_shape_ = det_image_shape;
+ }
+ /// Get cls_image_shape for the classification preprocess
+ std::vector GetDetImageShape() const { return det_image_shape_; }
+
+ /// Set static_shape_infer is true or not. When deploy PP-OCR
+ /// on hardware which can not support dynamic input shape very well,
+ /// like Huawei Ascned, static_shape_infer needs to to be true.
+ void SetStaticShapeInfer(bool static_shape_infer) {
+ static_shape_infer_ = static_shape_infer;
+ }
+ /// Get static_shape_infer of the recognition preprocess
+ bool GetStaticShapeInfer() const { return static_shape_infer_; }
+
private:
bool ResizeImage(FDMat* img, int resize_w, int resize_h, int max_resize_w,
int max_resize_h);
+ // for recording the switch of hwc2chw
+ bool disable_permute_ = false;
+ // for recording the switch of normalize
+ bool disable_normalize_ = false;
int max_side_len_ = 960;
std::vector> batch_det_img_info_;
std::shared_ptr resize_op_;
std::shared_ptr pad_op_;
std::shared_ptr normalize_permute_op_;
+ std::vector det_image_shape_ = {3, 960, 960};
+ bool static_shape_infer_ = false;
+ std::array OcrDetectorGetInfo(FDMat* img, int max_size_len);
};
} // namespace ocr
diff --git a/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc b/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc
index aa77542af..a1ebd09c6 100644
--- a/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc
+++ b/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc
@@ -26,6 +26,9 @@ void BindPPOCRModel(pybind11::module& m) {
pybind11::class_(
m, "DBDetectorPreprocessor")
.def(pybind11::init<>())
+ .def_property("static_shape_infer",
+ &vision::ocr::DBDetectorPreprocessor::GetStaticShapeInfer,
+ &vision::ocr::DBDetectorPreprocessor::SetStaticShapeInfer)
.def_property("max_side_len",
&vision::ocr::DBDetectorPreprocessor::GetMaxSideLen,
&vision::ocr::DBDetectorPreprocessor::SetMaxSideLen)
@@ -33,19 +36,27 @@ void BindPPOCRModel(pybind11::module& m) {
[](vision::ocr::DBDetectorPreprocessor& self,
const std::vector& mean, const std::vector& std,
bool is_scale) { self.SetNormalize(mean, std, is_scale); })
- .def("run", [](vision::ocr::DBDetectorPreprocessor& self,
- std::vector& im_list) {
- std::vector images;
- for (size_t i = 0; i < im_list.size(); ++i) {
- images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
- }
- std::vector outputs;
- self.Run(&images, &outputs);
- auto batch_det_img_info = self.GetBatchImgInfo();
- for (size_t i = 0; i < outputs.size(); ++i) {
- outputs[i].StopSharing();
- }
- return std::make_pair(outputs, *batch_det_img_info);
+ .def("run",
+ [](vision::ocr::DBDetectorPreprocessor& self,
+ std::vector& im_list) {
+ std::vector images;
+ for (size_t i = 0; i < im_list.size(); ++i) {
+ images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
+ }
+ std::vector outputs;
+ self.Run(&images, &outputs);
+ auto batch_det_img_info = self.GetBatchImgInfo();
+ for (size_t i = 0; i < outputs.size(); ++i) {
+ outputs[i].StopSharing();
+ }
+ return std::make_pair(outputs, *batch_det_img_info);
+ })
+ .def("disable_normalize",
+ [](vision::ocr::DBDetectorPreprocessor& self) {
+ self.DisableNormalize();
+ })
+ .def("disable_permute", [](vision::ocr::DBDetectorPreprocessor& self) {
+ self.DisablePermute();
});
pybind11::class_(
@@ -135,21 +146,30 @@ void BindPPOCRModel(pybind11::module& m) {
.def_property("is_scale",
&vision::ocr::ClassifierPreprocessor::GetIsScale,
&vision::ocr::ClassifierPreprocessor::SetIsScale)
- .def("run", [](vision::ocr::ClassifierPreprocessor& self,
- std::vector& im_list) {
- std::vector images;
- for (size_t i = 0; i < im_list.size(); ++i) {
- images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
- }
- std::vector outputs;
- if (!self.Run(&images, &outputs)) {
- throw std::runtime_error(
- "Failed to preprocess the input data in ClassifierPreprocessor.");
- }
- for (size_t i = 0; i < outputs.size(); ++i) {
- outputs[i].StopSharing();
- }
- return outputs;
+ .def("run",
+ [](vision::ocr::ClassifierPreprocessor& self,
+ std::vector& im_list) {
+ std::vector images;
+ for (size_t i = 0; i < im_list.size(); ++i) {
+ images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
+ }
+ std::vector outputs;
+ if (!self.Run(&images, &outputs)) {
+ throw std::runtime_error(
+ "Failed to preprocess the input data in "
+ "ClassifierPreprocessor.");
+ }
+ for (size_t i = 0; i < outputs.size(); ++i) {
+ outputs[i].StopSharing();
+ }
+ return outputs;
+ })
+ .def("disable_normalize",
+ [](vision::ocr::ClassifierPreprocessor& self) {
+ self.DisableNormalize();
+ })
+ .def("disable_permute", [](vision::ocr::ClassifierPreprocessor& self) {
+ self.DisablePermute();
});
pybind11::class_(
@@ -229,21 +249,30 @@ void BindPPOCRModel(pybind11::module& m) {
.def_property("is_scale",
&vision::ocr::RecognizerPreprocessor::GetIsScale,
&vision::ocr::RecognizerPreprocessor::SetIsScale)
- .def("run", [](vision::ocr::RecognizerPreprocessor& self,
- std::vector& im_list) {
- std::vector images;
- for (size_t i = 0; i < im_list.size(); ++i) {
- images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
- }
- std::vector outputs;
- if (!self.Run(&images, &outputs)) {
- throw std::runtime_error(
- "Failed to preprocess the input data in RecognizerPreprocessor.");
- }
- for (size_t i = 0; i < outputs.size(); ++i) {
- outputs[i].StopSharing();
- }
- return outputs;
+ .def("run",
+ [](vision::ocr::RecognizerPreprocessor& self,
+ std::vector& im_list) {
+ std::vector images;
+ for (size_t i = 0; i < im_list.size(); ++i) {
+ images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
+ }
+ std::vector outputs;
+ if (!self.Run(&images, &outputs)) {
+ throw std::runtime_error(
+ "Failed to preprocess the input data in "
+ "RecognizerPreprocessor.");
+ }
+ for (size_t i = 0; i < outputs.size(); ++i) {
+ outputs[i].StopSharing();
+ }
+ return outputs;
+ })
+ .def("disable_normalize",
+ [](vision::ocr::RecognizerPreprocessor& self) {
+ self.DisableNormalize();
+ })
+ .def("disable_permute", [](vision::ocr::RecognizerPreprocessor& self) {
+ self.DisablePermute();
});
pybind11::class_(
diff --git a/fastdeploy/vision/ocr/ppocr/rec_preprocessor.cc b/fastdeploy/vision/ocr/ppocr/rec_preprocessor.cc
index ad049fdce..59c7de279 100644
--- a/fastdeploy/vision/ocr/ppocr/rec_preprocessor.cc
+++ b/fastdeploy/vision/ocr/ppocr/rec_preprocessor.cc
@@ -13,22 +13,23 @@
// limitations under the License.
#include "fastdeploy/vision/ocr/ppocr/rec_preprocessor.h"
+
+#include "fastdeploy/function/concat.h"
#include "fastdeploy/utils/perf.h"
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
-#include "fastdeploy/function/concat.h"
namespace fastdeploy {
namespace vision {
namespace ocr {
void OcrRecognizerResizeImage(FDMat* mat, float max_wh_ratio,
- const std::vector& rec_image_shape, bool static_shape_infer) {
+ const std::vector& rec_image_shape,
+ bool static_shape_infer) {
int img_h, img_w;
img_h = rec_image_shape[1];
img_w = rec_image_shape[2];
if (!static_shape_infer) {
-
img_w = int(img_h * max_wh_ratio);
float ratio = float(mat->Width()) / float(mat->Height());
@@ -43,23 +44,29 @@ void OcrRecognizerResizeImage(FDMat* mat, float max_wh_ratio,
} else {
if (mat->Width() >= img_w) {
- Resize::Run(mat, img_w, img_h); // Reszie W to 320
+ Resize::Run(mat, img_w, img_h); // Reszie W to 320
} else {
Resize::Run(mat, mat->Width(), img_h);
Pad::Run(mat, 0, 0, 0, int(img_w - mat->Width()), {127, 127, 127});
// Pad to 320
- }
+ }
}
}
-bool RecognizerPreprocessor::Run(std::vector* images, std::vector* outputs) {
+bool RecognizerPreprocessor::Run(std::vector* images,
+ std::vector* outputs) {
return Run(images, outputs, 0, images->size(), {});
}
-bool RecognizerPreprocessor::Run(std::vector* images, std::vector* outputs,
- size_t start_index, size_t end_index, const std::vector& indices) {
- if (images->size() == 0 || end_index <= start_index || end_index > images->size()) {
- FDERROR << "images->size() or index error. Correct is: 0 <= start_index < end_index <= images->size()" << std::endl;
+bool RecognizerPreprocessor::Run(std::vector* images,
+ std::vector* outputs,
+ size_t start_index, size_t end_index,
+ const std::vector& indices) {
+ if (images->size() == 0 || end_index <= start_index ||
+ end_index > images->size()) {
+ FDERROR << "images->size() or index error. Correct is: 0 <= start_index < "
+ "end_index <= images->size()"
+ << std::endl;
return false;
}
@@ -67,7 +74,7 @@ bool RecognizerPreprocessor::Run(std::vector* images, std::vector* images, std::vectorat(real_index));
- OcrRecognizerResizeImage(mat, max_wh_ratio, rec_image_shape_, static_shape_infer_);
- NormalizeAndPermute::Run(mat, mean_, scale_, is_scale_);
+ OcrRecognizerResizeImage(mat, max_wh_ratio, rec_image_shape_,
+ static_shape_infer_);
+ if (!disable_normalize_ && !disable_permute_) {
+ NormalizeAndPermute::Run(mat, mean_, scale_, is_scale_);
+ } else {
+ if (!disable_normalize_) {
+ Normalize::Run(mat, mean_, scale_, is_scale_);
+ }
+ if (!disable_permute_) {
+ HWC2CHW::Run(mat);
+ Cast::Run(mat, "float");
+ }
+ }
}
// Only have 1 output Tensor.
outputs->resize(1);
- size_t tensor_size = end_index-start_index;
+ size_t tensor_size = end_index - start_index;
// Concat all the preprocessed data to a batch tensor
- std::vector tensors(tensor_size);
+ std::vector tensors(tensor_size);
for (size_t i = 0; i < tensor_size; ++i) {
size_t real_index = i + start_index;
if (indices.size() != 0) {
real_index = indices[i + start_index];
}
-
+
(*images)[real_index].ShareWithTensor(&(tensors[i]));
tensors[i].ExpandDim(0);
}
diff --git a/fastdeploy/vision/ocr/ppocr/rec_preprocessor.h b/fastdeploy/vision/ocr/ppocr/rec_preprocessor.h
index f7d741b5d..c5edb2a80 100644
--- a/fastdeploy/vision/ocr/ppocr/rec_preprocessor.h
+++ b/fastdeploy/vision/ocr/ppocr/rec_preprocessor.h
@@ -66,7 +66,16 @@ class FASTDEPLOY_DECL RecognizerPreprocessor {
/// Get rec_image_shape for the recognition preprocess
std::vector GetRecImageShape() { return rec_image_shape_; }
+ /// This function will disable normalize in preprocessing step.
+ void DisableNormalize() { disable_permute_ = true; }
+ /// This function will disable hwc2chw in preprocessing step.
+ void DisablePermute() { disable_normalize_ = true; }
+
private:
+ // for recording the switch of hwc2chw
+ bool disable_permute_ = false;
+ // for recording the switch of normalize
+ bool disable_normalize_ = false;
std::vector rec_image_shape_ = {3, 48, 320};
std::vector mean_ = {0.5f, 0.5f, 0.5f};
std::vector scale_ = {0.5f, 0.5f, 0.5f};
diff --git a/fastdeploy/vision/ocr/ppocr/recognizer.cc b/fastdeploy/vision/ocr/ppocr/recognizer.cc
old mode 100755
new mode 100644
index 69e75b281..74a8a26a1
--- a/fastdeploy/vision/ocr/ppocr/recognizer.cc
+++ b/fastdeploy/vision/ocr/ppocr/recognizer.cc
@@ -13,6 +13,7 @@
// limitations under the License.
#include "fastdeploy/vision/ocr/ppocr/recognizer.h"
+
#include "fastdeploy/utils/perf.h"
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
@@ -26,17 +27,19 @@ Recognizer::Recognizer(const std::string& model_file,
const std::string& params_file,
const std::string& label_path,
const RuntimeOption& custom_option,
- const ModelFormat& model_format):postprocessor_(label_path) {
+ const ModelFormat& model_format)
+ : postprocessor_(label_path) {
if (model_format == ModelFormat::ONNX) {
- valid_cpu_backends = {Backend::ORT,
- Backend::OPENVINO};
- valid_gpu_backends = {Backend::ORT, Backend::TRT};
+ valid_cpu_backends = {Backend::ORT, Backend::OPENVINO};
+ valid_gpu_backends = {Backend::ORT, Backend::TRT};
} else {
- valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO, Backend::LITE};
+ valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO,
+ Backend::LITE};
valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
valid_kunlunxin_backends = {Backend::LITE};
- valid_ascend_backends = {Backend::LITE};
+ valid_ascend_backends = {Backend::LITE};
valid_sophgonpu_backends = {Backend::SOPHGOTPU};
+ valid_rknpu_backends = {Backend::RKNPU2};
}
runtime_option = custom_option;
@@ -57,12 +60,14 @@ bool Recognizer::Initialize() {
}
std::unique_ptr Recognizer::Clone() const {
- std::unique_ptr clone_model = utils::make_unique(Recognizer(*this));
+ std::unique_ptr clone_model =
+ utils::make_unique(Recognizer(*this));
clone_model->SetRuntime(clone_model->CloneRuntime());
return clone_model;
}
-bool Recognizer::Predict(const cv::Mat& img, std::string* text, float* rec_score) {
+bool Recognizer::Predict(const cv::Mat& img, std::string* text,
+ float* rec_score) {
std::vector texts(1);
std::vector rec_scores(1);
bool success = BatchPredict({img}, &texts, &rec_scores);
@@ -75,20 +80,24 @@ bool Recognizer::Predict(const cv::Mat& img, std::string* text, float* rec_score
}
bool Recognizer::BatchPredict(const std::vector& images,
- std::vector* texts, std::vector* rec_scores) {
+ std::vector* texts,
+ std::vector* rec_scores) {
return BatchPredict(images, texts, rec_scores, 0, images.size(), {});
}
bool Recognizer::BatchPredict(const std::vector& images,
- std::vector* texts, std::vector* rec_scores,
- size_t start_index, size_t end_index, const std::vector& indices) {
+ std::vector* texts,
+ std::vector* rec_scores,
+ size_t start_index, size_t end_index,
+ const std::vector& indices) {
size_t total_size = images.size();
if (indices.size() != 0 && indices.size() != total_size) {
FDERROR << "indices.size() should be 0 or images.size()." << std::endl;
return false;
}
std::vector fd_images = WrapMat(images);
- if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, start_index, end_index, indices)) {
+ if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, start_index,
+ end_index, indices)) {
FDERROR << "Failed to preprocess the input image." << std::endl;
return false;
}
@@ -99,13 +108,15 @@ bool Recognizer::BatchPredict(const std::vector& images,
return false;
}
- if (!postprocessor_.Run(reused_output_tensors_, texts, rec_scores, start_index, total_size, indices)) {
- FDERROR << "Failed to postprocess the inference cls_results by runtime." << std::endl;
+ if (!postprocessor_.Run(reused_output_tensors_, texts, rec_scores,
+ start_index, total_size, indices)) {
+ FDERROR << "Failed to postprocess the inference cls_results by runtime."
+ << std::endl;
return false;
}
return true;
}
-} // namesapce ocr
+} // namespace ocr
} // namespace vision
} // namespace fastdeploy
\ No newline at end of file
diff --git a/fastdeploy/vision/visualize/ocr.cc b/fastdeploy/vision/visualize/ocr.cc
index 4946e08e7..5d0bb9e1b 100644
--- a/fastdeploy/vision/visualize/ocr.cc
+++ b/fastdeploy/vision/visualize/ocr.cc
@@ -17,10 +17,14 @@
namespace fastdeploy {
namespace vision {
-cv::Mat VisOcr(const cv::Mat &im, const OCRResult &ocr_result) {
+cv::Mat VisOcr(const cv::Mat& im, const OCRResult& ocr_result,
+ const float score_threshold) {
auto vis_im = im.clone();
for (int n = 0; n < ocr_result.boxes.size(); n++) {
+ if (ocr_result.rec_scores[n] < score_threshold) {
+ continue;
+ }
cv::Point rook_points[4];
for (int m = 0; m < 4; m++) {
@@ -28,7 +32,7 @@ cv::Mat VisOcr(const cv::Mat &im, const OCRResult &ocr_result) {
int(ocr_result.boxes[n][m * 2 + 1]));
}
- const cv::Point *ppt[1] = {rook_points};
+ const cv::Point* ppt[1] = {rook_points};
int npt[] = {4};
cv::polylines(vis_im, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0);
}
@@ -36,7 +40,7 @@ cv::Mat VisOcr(const cv::Mat &im, const OCRResult &ocr_result) {
return vis_im;
}
-cv::Mat Visualize::VisOcr(const cv::Mat &im, const OCRResult &ocr_result) {
+cv::Mat Visualize::VisOcr(const cv::Mat& im, const OCRResult& ocr_result) {
FDWARNING
<< "DEPRECATED: fastdeploy::vision::Visualize::VisOcr is deprecated, "
"please use fastdeploy::vision:VisOcr function instead."
@@ -51,7 +55,7 @@ cv::Mat Visualize::VisOcr(const cv::Mat &im, const OCRResult &ocr_result) {
int(ocr_result.boxes[n][m * 2 + 1]));
}
- const cv::Point *ppt[1] = {rook_points};
+ const cv::Point* ppt[1] = {rook_points};
int npt[] = {4};
cv::polylines(vis_im, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0);
}
diff --git a/fastdeploy/vision/visualize/visualize.h b/fastdeploy/vision/visualize/visualize.h
index 4614dc4e1..f382818c6 100755
--- a/fastdeploy/vision/visualize/visualize.h
+++ b/fastdeploy/vision/visualize/visualize.h
@@ -15,8 +15,8 @@
#pragma once
#include "fastdeploy/vision/common/result.h"
-#include "opencv2/imgproc/imgproc.hpp"
#include "fastdeploy/vision/tracking/pptracking/model.h"
+#include "opencv2/imgproc/imgproc.hpp"
namespace fastdeploy {
/** \brief All C++ FastDeploy Vision Models APIs are defined inside this namespace
@@ -41,9 +41,10 @@ class FASTDEPLOY_DECL Visualize {
bool remove_small_connected_area = false);
static cv::Mat RemoveSmallConnectedArea(const cv::Mat& alpha_pred,
float threshold);
- static cv::Mat SwapBackgroundMatting(
- const cv::Mat& im, const cv::Mat& background, const MattingResult& result,
- bool remove_small_connected_area = false);
+ static cv::Mat
+ SwapBackgroundMatting(const cv::Mat& im, const cv::Mat& background,
+ const MattingResult& result,
+ bool remove_small_connected_area = false);
static cv::Mat SwapBackgroundSegmentation(const cv::Mat& im,
const cv::Mat& background,
int background_label,
@@ -90,9 +91,11 @@ FASTDEPLOY_DECL cv::Mat VisDetection(const cv::Mat& im,
* \param[in] font_size font size
* \return cv::Mat type stores the visualized results
*/
-FASTDEPLOY_DECL cv::Mat VisClassification(
- const cv::Mat& im, const ClassifyResult& result, int top_k = 5,
- float score_threshold = 0.0f, float font_size = 0.5f);
+FASTDEPLOY_DECL cv::Mat VisClassification(const cv::Mat& im,
+ const ClassifyResult& result,
+ int top_k = 5,
+ float score_threshold = 0.0f,
+ float font_size = 0.5f);
/** \brief Show the visualized results with custom labels for classification models
*
* \param[in] im the input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format
@@ -103,10 +106,10 @@ FASTDEPLOY_DECL cv::Mat VisClassification(
* \param[in] font_size font size
* \return cv::Mat type stores the visualized results
*/
-FASTDEPLOY_DECL cv::Mat VisClassification(
- const cv::Mat& im, const ClassifyResult& result,
- const std::vector& labels, int top_k = 5,
- float score_threshold = 0.0f, float font_size = 0.5f);
+FASTDEPLOY_DECL cv::Mat
+VisClassification(const cv::Mat& im, const ClassifyResult& result,
+ const std::vector& labels, int top_k = 5,
+ float score_threshold = 0.0f, float font_size = 0.5f);
/** \brief Show the visualized results for face detection models
*
* \param[in] im the input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format
@@ -159,7 +162,8 @@ FASTDEPLOY_DECL cv::Mat VisMatting(const cv::Mat& im,
* \param[in] result the result produced by model
* \return cv::Mat type stores the visualized results
*/
-FASTDEPLOY_DECL cv::Mat VisOcr(const cv::Mat& im, const OCRResult& ocr_result);
+FASTDEPLOY_DECL cv::Mat VisOcr(const cv::Mat& im, const OCRResult& ocr_result,
+ const float score_threshold = 0);
FASTDEPLOY_DECL cv::Mat VisMOT(const cv::Mat& img, const MOTResult& results,
float score_threshold = 0.0f,
@@ -172,10 +176,10 @@ FASTDEPLOY_DECL cv::Mat VisMOT(const cv::Mat& img, const MOTResult& results,
* \param[in] remove_small_connected_area if remove_small_connected_area==true, the visualized result will not include the small connected areas
* \return cv::Mat type stores the visualized results
*/
-FASTDEPLOY_DECL cv::Mat SwapBackground(const cv::Mat& im,
- const cv::Mat& background,
- const MattingResult& result,
- bool remove_small_connected_area = false);
+FASTDEPLOY_DECL cv::Mat
+SwapBackground(const cv::Mat& im, const cv::Mat& background,
+ const MattingResult& result,
+ bool remove_small_connected_area = false);
/** \brief Swap the image background with SegmentationResult
*
* \param[in] im the input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format
@@ -196,12 +200,11 @@ FASTDEPLOY_DECL cv::Mat SwapBackground(const cv::Mat& im,
* \param[in] conf_threshold threshold for result scores, the result will not be shown if the score is less than conf_threshold
* \return cv::Mat type stores the visualized results
*/
-FASTDEPLOY_DECL cv::Mat VisKeypointDetection(const cv::Mat& im,
- const KeyPointDetectionResult& results,
- float conf_threshold = 0.5f);
+FASTDEPLOY_DECL cv::Mat
+VisKeypointDetection(const cv::Mat& im, const KeyPointDetectionResult& results,
+ float conf_threshold = 0.5f);
FASTDEPLOY_DECL cv::Mat VisHeadPose(const cv::Mat& im,
- const HeadPoseResult& result,
- int size = 50,
+ const HeadPoseResult& result, int size = 50,
int line_size = 1);
} // namespace vision
diff --git a/python/fastdeploy/vision/ocr/ppocr/__init__.py b/python/fastdeploy/vision/ocr/ppocr/__init__.py
index 403f70aa7..1fa39600b 100755
--- a/python/fastdeploy/vision/ocr/ppocr/__init__.py
+++ b/python/fastdeploy/vision/ocr/ppocr/__init__.py
@@ -65,6 +65,29 @@ class DBDetectorPreprocessor:
"""
self._preprocessor.set_normalize(mean, std, is_scale)
+ @property
+ def static_shape_infer(self):
+ return self._preprocessor.static_shape_infer
+
+ @static_shape_infer.setter
+ def static_shape_infer(self, value):
+ assert isinstance(
+ value,
+ bool), "The value to set `static_shape_infer` must be type of bool."
+ self._preprocessor.static_shape_infer = value
+
+ def disable_normalize(self):
+ """
+ This function will disable normalize in preprocessing step.
+ """
+ self._preprocessor.disable_normalize()
+
+ def disable_permute(self):
+ """
+ This function will disable hwc2chw in preprocessing step.
+ """
+ self._preprocessor.disable_permute()
+
class DBDetectorPostprocessor:
def __init__(self):
@@ -358,6 +381,18 @@ class ClassifierPreprocessor:
list), "The value to set `cls_image_shape` must be type of list."
self._preprocessor.cls_image_shape = value
+ def disable_normalize(self):
+ """
+ This function will disable normalize in preprocessing step.
+ """
+ self._preprocessor.disable_normalize()
+
+ def disable_permute(self):
+ """
+ This function will disable hwc2chw in preprocessing step.
+ """
+ self._preprocessor.disable_permute()
+
class ClassifierPostprocessor:
def __init__(self):
@@ -581,6 +616,18 @@ class RecognizerPreprocessor:
list), "The value to set `rec_image_shape` must be type of list."
self._preprocessor.rec_image_shape = value
+ def disable_normalize(self):
+ """
+ This function will disable normalize in preprocessing step.
+ """
+ self._preprocessor.disable_normalize()
+
+ def disable_permute(self):
+ """
+ This function will disable hwc2chw in preprocessing step.
+ """
+ self._preprocessor.disable_permute()
+
class RecognizerPostprocessor:
def __init__(self, label_path):
diff --git a/tools/rknpu2/config/ppocrv3_cls.yaml b/tools/rknpu2/config/ppocrv3_cls.yaml
new file mode 100644
index 000000000..197becc2f
--- /dev/null
+++ b/tools/rknpu2/config/ppocrv3_cls.yaml
@@ -0,0 +1,15 @@
+mean:
+ -
+ - 127.5
+ - 127.5
+ - 127.5
+std:
+ -
+ - 127.5
+ - 127.5
+ - 127.5
+model_path: ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx
+outputs_nodes:
+do_quantization: False
+dataset:
+output_folder: "./ch_ppocr_mobile_v2.0_cls_infer"
diff --git a/tools/rknpu2/config/ppocrv3_det.yaml b/tools/rknpu2/config/ppocrv3_det.yaml
new file mode 100644
index 000000000..2897c5f74
--- /dev/null
+++ b/tools/rknpu2/config/ppocrv3_det.yaml
@@ -0,0 +1,15 @@
+mean:
+ -
+ - 123.675
+ - 116.28
+ - 103.53
+std:
+ -
+ - 58.395
+ - 57.12
+ - 57.375
+model_path: ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx
+outputs_nodes:
+do_quantization: False
+dataset:
+output_folder: "./ch_PP-OCRv3_det_infer"
diff --git a/tools/rknpu2/config/ppocrv3_rec.yaml b/tools/rknpu2/config/ppocrv3_rec.yaml
new file mode 100644
index 000000000..8a22a39a2
--- /dev/null
+++ b/tools/rknpu2/config/ppocrv3_rec.yaml
@@ -0,0 +1,15 @@
+mean:
+ -
+ - 127.5
+ - 127.5
+ - 127.5
+std:
+ -
+ - 127.5
+ - 127.5
+ - 127.5
+model_path: ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx
+outputs_nodes:
+do_quantization: False
+dataset:
+output_folder: "./ch_PP-OCRv3_rec_infer"
diff --git a/tools/rknpu2/export.py b/tools/rknpu2/export.py
index c42a1eade..a94b34885 100644
--- a/tools/rknpu2/export.py
+++ b/tools/rknpu2/export.py
@@ -65,7 +65,10 @@ if __name__ == "__main__":
if not os.path.exists(yaml_config["output_folder"]):
os.mkdir(yaml_config["output_folder"])
- model_base_name = os.path.basename(yaml_config["model_path"]).split(".")[0]
+ name_list = os.path.basename(yaml_config["model_path"]).split(".")
+ model_base_name = ""
+ for name in name_list[0:-1]:
+ model_base_name += name
model_device_name = config.target_platform.lower()
if yaml_config["do_quantization"]:
model_save_name = model_base_name + "_" + model_device_name + "_quantized" + ".rknn"