mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 00:57:33 +08:00
[Hackathon 182 Model] Update PPOCRV3 For RKNPU2 (#1403)
* update ppocrv3 for rknpu2 * add config * add config * detele unuseful * update useful results * Repair note * Repair note * fixed bugs * update
This commit is contained in:
@@ -131,9 +131,9 @@ endif()
|
|||||||
|
|
||||||
if(ENABLE_RKNPU2_BACKEND)
|
if(ENABLE_RKNPU2_BACKEND)
|
||||||
if(RKNN2_TARGET_SOC STREQUAL "RK356X")
|
if(RKNN2_TARGET_SOC STREQUAL "RK356X")
|
||||||
set(RKNPU2_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/rknpu2_runtime/RK356X/lib/librknn_api.so)
|
set(RKNPU2_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/rknpu2_runtime/lib/librknnrt.so)
|
||||||
elseif (RKNN2_TARGET_SOC STREQUAL "RK3588")
|
elseif (RKNN2_TARGET_SOC STREQUAL "RK3588")
|
||||||
set(RKNPU2_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/rknpu2_runtime/RK3588/lib/librknn_api.so)
|
set(RKNPU2_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/rknpu2_runtime/lib/librknnrt.so)
|
||||||
else ()
|
else ()
|
||||||
message(FATAL_ERROR "RKNN2_TARGET_SOC is not set, ref value: RK356X or RK3588")
|
message(FATAL_ERROR "RKNN2_TARGET_SOC is not set, ref value: RK356X or RK3588")
|
||||||
endif()
|
endif()
|
||||||
|
@@ -1,7 +1,7 @@
|
|||||||
# get RKNPU2_URL
|
# get RKNPU2_URL
|
||||||
set(RKNPU2_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/")
|
set(RKNPU2_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/")
|
||||||
set(RKNPU2_VERSION "1.4.0")
|
set(RKNPU2_VERSION "1.4.2b0")
|
||||||
set(RKNPU2_FILE "rknpu2_runtime-linux-x64-${RKNPU2_VERSION}.tgz")
|
set(RKNPU2_FILE "rknpu2_runtime-linux-aarch64-${RKNPU2_VERSION}-${RKNN2_TARGET_SOC}.tgz")
|
||||||
set(RKNPU2_URL "${RKNPU2_URL_BASE}${RKNPU2_FILE}")
|
set(RKNPU2_URL "${RKNPU2_URL_BASE}${RKNPU2_FILE}")
|
||||||
|
|
||||||
# download_and_decompress
|
# download_and_decompress
|
||||||
@@ -10,11 +10,12 @@ download_and_decompress(${RKNPU2_URL} ${CMAKE_CURRENT_BINARY_DIR}/${RKNPU2_FILE}
|
|||||||
# set path
|
# set path
|
||||||
set(RKNPU_RUNTIME_PATH ${THIRD_PARTY_PATH}/install/rknpu2_runtime)
|
set(RKNPU_RUNTIME_PATH ${THIRD_PARTY_PATH}/install/rknpu2_runtime)
|
||||||
|
|
||||||
|
# include lib
|
||||||
if (EXISTS ${RKNPU_RUNTIME_PATH})
|
if (EXISTS ${RKNPU_RUNTIME_PATH})
|
||||||
set(RKNN_RT_LIB ${RKNPU_RUNTIME_PATH}/${RKNN2_TARGET_SOC}/lib/librknnrt.so)
|
set(RKNN_RT_LIB ${RKNPU_RUNTIME_PATH}/lib/librknnrt.so)
|
||||||
include_directories(${RKNPU_RUNTIME_PATH}/${RKNN2_TARGET_SOC}/include)
|
include_directories(${RKNPU_RUNTIME_PATH}/include)
|
||||||
else ()
|
else ()
|
||||||
message(FATAL_ERROR "[rknpu2.cmake] download_and_decompress rknpu2_runtime error")
|
message(FATAL_ERROR "[rknpu2.cmake] RKNPU_RUNTIME_PATH does not exist.")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
|
||||||
|
77
examples/vision/ocr/PP-OCRv3/rknpu2/README.md
Normal file
77
examples/vision/ocr/PP-OCRv3/rknpu2/README.md
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
# PaddleOCR 模型部署
|
||||||
|
|
||||||
|
## PaddleOCR为多个模型组合串联任务,包含如下几个模型构成
|
||||||
|
|
||||||
|
* 文本检测 `DBDetector`
|
||||||
|
* [可选]方向分类 `Classifer` 用于调整进入文字识别前的图像方向
|
||||||
|
* 文字识别 `Recognizer` 用于从图像中识别出文字
|
||||||
|
|
||||||
|
根据不同场景, FastDeploy汇总提供如下OCR任务部署, 用户需同时下载3个模型与字典文件(或2个,分类器可选), 完成OCR整个预测流程
|
||||||
|
|
||||||
|
## PP-OCR 中英文系列模型
|
||||||
|
|
||||||
|
下表中的模型下载链接由PaddleOCR模型库提供, 详见[PP-OCR系列模型列表](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6/doc/doc_ch/models_list.md)
|
||||||
|
|
||||||
|
| OCR版本 | 文本框检测 | 方向分类模型 | 文字识别 | 字典文件 | 说明 |
|
||||||
|
|:-------------------|:---------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:--------------------------------------------------------|
|
||||||
|
| ch_PP-OCRv3[推荐] | [ch_PP-OCRv3_det](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar) | [ch_ppocr_mobile_v2.0_cls](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) | [ch_PP-OCRv3_rec](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar) | [ppocr_keys_v1.txt](https://bj.bcebos.com/paddlehub/fastdeploy/ppocr_keys_v1.txt) | OCRv3系列原始超轻量模型,支持中英文、多语种文本检测 |
|
||||||
|
| en_PP-OCRv3[推荐] | [en_PP-OCRv3_det](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar) | [ch_ppocr_mobile_v2.0_cls](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) | [en_PP-OCRv3_rec](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tar) | [en_dict.txt](https://bj.bcebos.com/paddlehub/fastdeploy/en_dict.txt) | OCRv3系列原始超轻量模型,支持英文与数字识别,除检测模型和识别模型的训练数据与中文模型不同以外,无其他区别 |
|
||||||
|
| ch_PP-OCRv2 | [ch_PP-OCRv2_det](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar) | [ch_ppocr_mobile_v2.0_cls](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) | [ch_PP-OCRv2_rec](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar) | [ppocr_keys_v1.txt](https://bj.bcebos.com/paddlehub/fastdeploy/ppocr_keys_v1.txt) | OCRv2系列原始超轻量模型,支持中英文、多语种文本检测 |
|
||||||
|
| ch_PP-OCRv2_mobile | [ch_ppocr_mobile_v2.0_det](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) | [ch_ppocr_mobile_v2.0_cls](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) | [ch_ppocr_mobile_v2.0_rec](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) | [ppocr_keys_v1.txt](https://bj.bcebos.com/paddlehub/fastdeploy/ppocr_keys_v1.txt) | OCRv2系列原始超轻量模型,支持中英文、多语种文本检测,比PPOCRv2更加轻量 |
|
||||||
|
| ch_PP-OCRv2_server | [ch_ppocr_server_v2.0_det](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) | [ch_ppocr_mobile_v2.0_cls](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) | [ch_ppocr_server_v2.0_rec](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) | [ppocr_keys_v1.txt](https://bj.bcebos.com/paddlehub/fastdeploy/ppocr_keys_v1.txt) | OCRv2服务器系列模型, 支持中英文、多语种文本检测,比超轻量模型更大,但效果更好 |
|
||||||
|
|
||||||
|
## 模型转换
|
||||||
|
|
||||||
|
在RKNPU2上使用PPOCR时,我们需要把Paddle静态图模型转为RKNN模型。
|
||||||
|
|
||||||
|
### 静态图模型转RKNN格式模型
|
||||||
|
|
||||||
|
rknn_toolkit2工具暂不支持直接从Paddle静态图模型直接转换为RKNN模型,因此我们需要先将Paddle静态图模型转为RKNN模型。
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 下载模型和字典文件
|
||||||
|
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar
|
||||||
|
tar -xvf ch_PP-OCRv3_det_infer.tar
|
||||||
|
|
||||||
|
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar
|
||||||
|
tar -xvf ch_ppocr_mobile_v2.0_cls_infer.tar
|
||||||
|
|
||||||
|
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar
|
||||||
|
tar -xvf ch_PP-OCRv3_rec_infer.tar
|
||||||
|
|
||||||
|
# 转换模型到ONNX格式的模型
|
||||||
|
paddle2onnx --model_dir ch_PP-OCRv3_det_infer \
|
||||||
|
--model_filename inference.pdmodel \
|
||||||
|
--params_filename inference.pdiparams \
|
||||||
|
--save_file ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \
|
||||||
|
--enable_dev_version True
|
||||||
|
paddle2onnx --model_dir ch_ppocr_mobile_v2.0_cls_infer \
|
||||||
|
--model_filename inference.pdmodel \
|
||||||
|
--params_filename inference.pdiparams \
|
||||||
|
--save_file ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \
|
||||||
|
--enable_dev_version True
|
||||||
|
paddle2onnx --model_dir ch_PP-OCRv3_rec_infer \
|
||||||
|
--model_filename inference.pdmodel \
|
||||||
|
--params_filename inference.pdiparams \
|
||||||
|
--save_file ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \
|
||||||
|
--enable_dev_version True
|
||||||
|
|
||||||
|
# 固定模型的输入shape
|
||||||
|
python -m paddle2onnx.optimize --input_model ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \
|
||||||
|
--output_model ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \
|
||||||
|
--input_shape_dict "{'x':[1,3,960,960]}"
|
||||||
|
python -m paddle2onnx.optimize --input_model ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \
|
||||||
|
--output_model ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \
|
||||||
|
--input_shape_dict "{'x':[1,3,48,192]}"
|
||||||
|
python -m paddle2onnx.optimize --input_model ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \
|
||||||
|
--output_model ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \
|
||||||
|
--input_shape_dict "{'x':[1,3,48,320]}"
|
||||||
|
|
||||||
|
# 转换ONNX模型到RKNN模型
|
||||||
|
python tools/rknpu2/export.py --config_path tools/rknpu2/config/ppocrv3_det.yaml \
|
||||||
|
--target_platform rk3588
|
||||||
|
python tools/rknpu2/export.py --config_path tools/rknpu2/config/ppocrv3_rec.yaml \
|
||||||
|
--target_platform rk3588
|
||||||
|
python tools/rknpu2/export.py --config_path tools/rknpu2/config/ppocrv3_cls.yaml \
|
||||||
|
--target_platform rk3588
|
||||||
|
```
|
14
examples/vision/ocr/PP-OCRv3/rknpu2/cpp/CMakeLists.txt
Normal file
14
examples/vision/ocr/PP-OCRv3/rknpu2/cpp/CMakeLists.txt
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
PROJECT(infer_demo C CXX)
|
||||||
|
CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
|
||||||
|
|
||||||
|
# 指定下载解压后的fastdeploy库路径
|
||||||
|
option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
|
||||||
|
|
||||||
|
include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
|
||||||
|
|
||||||
|
# 添加FastDeploy依赖头文件
|
||||||
|
include_directories(${FASTDEPLOY_INCS})
|
||||||
|
|
||||||
|
add_executable(infer_static_shape_demo ${PROJECT_SOURCE_DIR}/infer_static_shape.cc)
|
||||||
|
# 添加FastDeploy库依赖
|
||||||
|
target_link_libraries(infer_static_shape_demo ${FASTDEPLOY_LIBS})
|
55
examples/vision/ocr/PP-OCRv3/rknpu2/cpp/README.md
Executable file
55
examples/vision/ocr/PP-OCRv3/rknpu2/cpp/README.md
Executable file
@@ -0,0 +1,55 @@
|
|||||||
|
English | [简体中文](README_CN.md)
|
||||||
|
# PPOCRv3 C++ Deployment Example
|
||||||
|
|
||||||
|
This directory provides examples that `infer.cc` fast finishes the deployment of PPOCRv3 on CPU/GPU and GPU accelerated by TensorRT.
|
||||||
|
|
||||||
|
Two steps before deployment
|
||||||
|
|
||||||
|
- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)
|
||||||
|
- 2. Download the precompiled deployment library and samples code according to your development environment. Refer to [FastDeploy Precompiled Library](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)
|
||||||
|
|
||||||
|
Taking the CPU inference on Linux as an example, the compilation test can be completed by executing the following command in this directory. FastDeploy version 0.7.0 or above (x.x.x>=0.7.0) is required to support this model.
|
||||||
|
|
||||||
|
```
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above
|
||||||
|
cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
|
||||||
|
make -j
|
||||||
|
|
||||||
|
|
||||||
|
# Download model, image, and dictionary files
|
||||||
|
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg
|
||||||
|
|
||||||
|
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt
|
||||||
|
|
||||||
|
# CPU推理
|
||||||
|
./infer_static_shape_demo ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \
|
||||||
|
./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \
|
||||||
|
./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \
|
||||||
|
./ppocr_keys_v1.txt \
|
||||||
|
./12.jpg \
|
||||||
|
0
|
||||||
|
# RKNPU推理
|
||||||
|
./infer_static_shape_demo ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer_rk3588_unquantized.rknn \
|
||||||
|
./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v20_cls_infer_rk3588_unquantized.rknn \
|
||||||
|
./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer_rk3588_unquantized.rknn \
|
||||||
|
./ppocr_keys_v1.txt \
|
||||||
|
./12.jpg \
|
||||||
|
1
|
||||||
|
```
|
||||||
|
|
||||||
|
The above command works for Linux or MacOS. For SDK in Windows, refer to:
|
||||||
|
- [How to use FastDeploy C++ SDK in Windows](../../../../../docs/cn/faq/use_sdk_on_windows.md)
|
||||||
|
|
||||||
|
The visualized result after running is as follows
|
||||||
|
|
||||||
|
<img width="640" src="https://user-images.githubusercontent.com/109218879/185826024-f7593a0c-1bd2-4a60-b76c-15588484fa08.jpg">
|
||||||
|
|
||||||
|
## Other Documents
|
||||||
|
|
||||||
|
- [C++ API Reference](https://baidu-paddle.github.io/fastdeploy-api/cpp/html/)
|
||||||
|
- [PPOCR Model Description](../../)
|
||||||
|
- [PPOCRv3 Python Deployment](../python)
|
||||||
|
- [Model Prediction Results](../../../../../../docs/en/faq/how_to_change_backend.md)
|
||||||
|
- [How to switch the model inference backend engine](../../../../../../docs/en/faq/how_to_change_backend.md)
|
63
examples/vision/ocr/PP-OCRv3/rknpu2/cpp/README_CN.md
Normal file
63
examples/vision/ocr/PP-OCRv3/rknpu2/cpp/README_CN.md
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
[English](README_CN.md) | 简体中文
|
||||||
|
# PPOCRv3 C++部署示例
|
||||||
|
|
||||||
|
本目录下提供`infer.cc`快速完成PPOCRv3在CPU/GPU,以及GPU上通过TensorRT加速部署的示例。
|
||||||
|
|
||||||
|
在部署前,需确认你已经成功完成以下两个操作:
|
||||||
|
|
||||||
|
* [正确编译FastDeploy SDK](../../../../../../docs/cn/faq/rknpu2/build.md).
|
||||||
|
* [成功转换模型](../README.md).
|
||||||
|
|
||||||
|
在本目录执行如下命令即可完成编译测试,支持此模型需保证FastDeploy版本1.0.3以上(x.x.x>1.0.3), RKNN版本在1.4.1b22以上。
|
||||||
|
|
||||||
|
```
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
|
||||||
|
make -j
|
||||||
|
|
||||||
|
# 下载图片和字典文件
|
||||||
|
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg
|
||||||
|
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt
|
||||||
|
|
||||||
|
|
||||||
|
# 拷贝RKNN模型到build目录
|
||||||
|
|
||||||
|
# CPU推理
|
||||||
|
./infer_static_shape_demo ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \
|
||||||
|
./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \
|
||||||
|
./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \
|
||||||
|
./ppocr_keys_v1.txt \
|
||||||
|
./12.jpg \
|
||||||
|
0
|
||||||
|
# RKNPU推理
|
||||||
|
./infer_static_shape_demo ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer_rk3588_unquantized.rknn \
|
||||||
|
./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v20_cls_infer_rk3588_unquantized.rknn \
|
||||||
|
./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer_rk3588_unquantized.rknn \
|
||||||
|
./ppocr_keys_v1.txt \
|
||||||
|
./12.jpg \
|
||||||
|
1
|
||||||
|
```
|
||||||
|
|
||||||
|
运行完成可视化结果如下图所示:
|
||||||
|
|
||||||
|
<img width="640" src="https://user-images.githubusercontent.com/109218879/185826024-f7593a0c-1bd2-4a60-b76c-15588484fa08.jpg">
|
||||||
|
|
||||||
|
结果输出如下:
|
||||||
|
|
||||||
|
```text
|
||||||
|
det boxes: [[276,174],[285,173],[285,178],[276,179]]rec text: rec score:0.000000 cls label: 1 cls score: 0.766602
|
||||||
|
det boxes: [[43,408],[483,390],[483,431],[44,449]]rec text: 上海斯格威铂尔曼大酒店 rec score:0.888450 cls label: 0 cls score: 1.000000
|
||||||
|
det boxes: [[186,456],[399,448],[399,480],[186,488]]rec text: 打浦路15号 rec score:0.988769 cls label: 0 cls score: 1.000000
|
||||||
|
det boxes: [[18,501],[513,485],[514,537],[18,554]]rec text: 绿洲仕格维花园公寓 rec score:0.992730 cls label: 0 cls score: 1.000000
|
||||||
|
det boxes: [[78,553],[404,541],[404,573],[78,585]]rec text: 打浦路252935号 rec score:0.983545 cls label: 0 cls score: 1.000000
|
||||||
|
Visualized result saved in ./vis_result.jpg
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## 其它文档
|
||||||
|
|
||||||
|
- [C++ API查阅](https://baidu-paddle.github.io/fastdeploy-api/cpp/html/)
|
||||||
|
- [PPOCR 系列模型介绍](../../../README_CN.md)
|
||||||
|
- [PPOCRv3 Python部署](../python)
|
||||||
|
- [模型预测结果说明](../../../../../../docs/cn/faq/how_to_change_backend.md)
|
126
examples/vision/ocr/PP-OCRv3/rknpu2/cpp/infer_static_shape.cc
Normal file
126
examples/vision/ocr/PP-OCRv3/rknpu2/cpp/infer_static_shape.cc
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "fastdeploy/vision.h"
|
||||||
|
|
||||||
|
void InitAndInfer(const std::string &det_model_file,
|
||||||
|
const std::string &cls_model_file,
|
||||||
|
const std::string &rec_model_file,
|
||||||
|
const std::string &rec_label_file,
|
||||||
|
const std::string &image_file,
|
||||||
|
const fastdeploy::RuntimeOption &option,
|
||||||
|
const fastdeploy::ModelFormat &format) {
|
||||||
|
auto det_params_file = "";
|
||||||
|
auto cls_params_file = "";
|
||||||
|
auto rec_params_file = "";
|
||||||
|
|
||||||
|
auto det_option = option;
|
||||||
|
auto cls_option = option;
|
||||||
|
auto rec_option = option;
|
||||||
|
|
||||||
|
if (format == fastdeploy::ONNX) {
|
||||||
|
std::cout << "ONNX Model" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto det_model = fastdeploy::vision::ocr::DBDetector(
|
||||||
|
det_model_file, det_params_file, det_option, format);
|
||||||
|
auto cls_model = fastdeploy::vision::ocr::Classifier(
|
||||||
|
cls_model_file, cls_params_file, cls_option, format);
|
||||||
|
auto rec_model = fastdeploy::vision::ocr::Recognizer(
|
||||||
|
rec_model_file, rec_params_file, rec_label_file, rec_option, format);
|
||||||
|
|
||||||
|
if (format == fastdeploy::RKNN) {
|
||||||
|
cls_model.GetPreprocessor().DisableNormalize();
|
||||||
|
cls_model.GetPreprocessor().DisablePermute();
|
||||||
|
|
||||||
|
det_model.GetPreprocessor().DisableNormalize();
|
||||||
|
det_model.GetPreprocessor().DisablePermute();
|
||||||
|
|
||||||
|
rec_model.GetPreprocessor().DisableNormalize();
|
||||||
|
rec_model.GetPreprocessor().DisablePermute();
|
||||||
|
}
|
||||||
|
det_model.GetPreprocessor().SetStaticShapeInfer(true);
|
||||||
|
rec_model.GetPreprocessor().SetStaticShapeInfer(true);
|
||||||
|
|
||||||
|
assert(det_model.Initialized());
|
||||||
|
assert(cls_model.Initialized());
|
||||||
|
assert(rec_model.Initialized());
|
||||||
|
|
||||||
|
// The classification model is optional, so the PP-OCR can also be connected
|
||||||
|
// in series as follows auto ppocr_v3 =
|
||||||
|
// fastdeploy::pipeline::PPOCRv3(&det_model, &rec_model);
|
||||||
|
auto ppocr_v3 =
|
||||||
|
fastdeploy::pipeline::PPOCRv3(&det_model, &cls_model, &rec_model);
|
||||||
|
|
||||||
|
// When users enable static shape infer for rec model, the batch size of cls
|
||||||
|
// and rec model must to be set to 1.
|
||||||
|
ppocr_v3.SetClsBatchSize(1);
|
||||||
|
ppocr_v3.SetRecBatchSize(1);
|
||||||
|
|
||||||
|
if (!ppocr_v3.Initialized()) {
|
||||||
|
std::cerr << "Failed to initialize PP-OCR." << std::endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto im = cv::imread(image_file);
|
||||||
|
|
||||||
|
fastdeploy::vision::OCRResult result;
|
||||||
|
if (!ppocr_v3.Predict(im, &result)) {
|
||||||
|
std::cerr << "Failed to predict." << std::endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << result.Str() << std::endl;
|
||||||
|
|
||||||
|
auto vis_im = fastdeploy::vision::VisOcr(im, result);
|
||||||
|
cv::imwrite("vis_result.jpg", vis_im);
|
||||||
|
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]) {
|
||||||
|
if (argc < 7) {
|
||||||
|
std::cout << "Usage: infer_demo path/to/det_model path/to/cls_model "
|
||||||
|
"path/to/rec_model path/to/rec_label_file path/to/image "
|
||||||
|
"run_option, "
|
||||||
|
"e.g ./infer_demo ./ch_PP-OCRv3_det_infer "
|
||||||
|
"./ch_ppocr_mobile_v2.0_cls_infer ./ch_PP-OCRv3_rec_infer "
|
||||||
|
"./ppocr_keys_v1.txt ./12.jpg 0"
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
|
||||||
|
"with ascend."
|
||||||
|
<< std::endl;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
fastdeploy::RuntimeOption option;
|
||||||
|
fastdeploy::ModelFormat format;
|
||||||
|
int flag = std::atoi(argv[6]);
|
||||||
|
|
||||||
|
if (flag == 0) {
|
||||||
|
option.UseCpu();
|
||||||
|
format = fastdeploy::ONNX;
|
||||||
|
} else if (flag == 1) {
|
||||||
|
option.UseRKNPU2();
|
||||||
|
format = fastdeploy::RKNN;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string det_model_dir = argv[1];
|
||||||
|
std::string cls_model_dir = argv[2];
|
||||||
|
std::string rec_model_dir = argv[3];
|
||||||
|
std::string rec_label_file = argv[4];
|
||||||
|
std::string test_image = argv[5];
|
||||||
|
InitAndInfer(det_model_dir, cls_model_dir, rec_model_dir, rec_label_file,
|
||||||
|
test_image, option, format);
|
||||||
|
return 0;
|
||||||
|
}
|
49
examples/vision/ocr/PP-OCRv3/rknpu2/python/README.md
Executable file
49
examples/vision/ocr/PP-OCRv3/rknpu2/python/README.md
Executable file
@@ -0,0 +1,49 @@
|
|||||||
|
English | [简体中文](README_CN.md)
|
||||||
|
# PPOCRv3 Python Deployment Example
|
||||||
|
|
||||||
|
Two steps before deployment
|
||||||
|
|
||||||
|
- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)
|
||||||
|
- 2. Install FastDeploy Python whl package. Refer to [FastDeploy Python Installation](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)
|
||||||
|
|
||||||
|
This directory provides examples that `infer.py` fast finishes the deployment of PPOCRv3 on CPU/GPU and GPU accelerated by TensorRT. The script is as follows
|
||||||
|
|
||||||
|
```
|
||||||
|
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg
|
||||||
|
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt
|
||||||
|
|
||||||
|
# Download the example code for deployment
|
||||||
|
git clone https://github.com/PaddlePaddle/FastDeploy.git
|
||||||
|
cd examples/vision/ocr/PP-OCRv3/python/
|
||||||
|
|
||||||
|
python3 infer_static_shape.py \
|
||||||
|
--det_model ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \
|
||||||
|
--cls_model ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \
|
||||||
|
--rec_model ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \
|
||||||
|
--rec_label_file ./ppocr_keys_v1.txt \
|
||||||
|
--image 12.jpg \
|
||||||
|
--device cpu
|
||||||
|
|
||||||
|
# NPU推理
|
||||||
|
python3 infer_static_shape.py \
|
||||||
|
--det_model ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer_rk3588_unquantized.rknn \
|
||||||
|
--cls_model ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v20_cls_infer_rk3588_unquantized.rknn \
|
||||||
|
--rec_model ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer_rk3588_unquantized.rknn \
|
||||||
|
--rec_label_file ppocr_keys_v1.txt \
|
||||||
|
--image 12.jpg \
|
||||||
|
--device npu
|
||||||
|
```
|
||||||
|
|
||||||
|
The visualized result after running is as follows
|
||||||
|
<img width="640" src="https://user-images.githubusercontent.com/109218879/185826024-f7593a0c-1bd2-4a60-b76c-15588484fa08.jpg">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Other Documents
|
||||||
|
|
||||||
|
- [Python API reference](https://baidu-paddle.github.io/fastdeploy-api/python/html/)
|
||||||
|
- [PPOCR Model Description](../../)
|
||||||
|
- [PPOCRv3 C++ Deployment](../cpp)
|
||||||
|
- [Model Prediction Results](../../../../../../docs/api/vision_results/)
|
||||||
|
- [How to switch the model inference backend engine](../../../../../../docs/en/faq/how_to_change_backend.md)
|
62
examples/vision/ocr/PP-OCRv3/rknpu2/python/README_CN.md
Normal file
62
examples/vision/ocr/PP-OCRv3/rknpu2/python/README_CN.md
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
[English](README.md) | 简体中文
|
||||||
|
# PPOCRv3 Python部署示例
|
||||||
|
|
||||||
|
在部署前,需确认以下两个步骤
|
||||||
|
|
||||||
|
- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
|
||||||
|
- 2. FastDeploy Python whl包安装,参考[FastDeploy Python安装](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
|
||||||
|
|
||||||
|
本目录下提供`infer.py`快速完成PPOCRv3在CPU/GPU,以及GPU上通过TensorRT加速部署的示例。执行如下脚本即可完成
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
# 下载模型,图片和字典文件
|
||||||
|
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar
|
||||||
|
tar xvf ch_PP-OCRv3_det_infer.tar
|
||||||
|
|
||||||
|
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar
|
||||||
|
tar -xvf ch_ppocr_mobile_v2.0_cls_infer.tar
|
||||||
|
|
||||||
|
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar
|
||||||
|
tar xvf ch_PP-OCRv3_rec_infer.tar
|
||||||
|
|
||||||
|
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg
|
||||||
|
|
||||||
|
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt
|
||||||
|
|
||||||
|
#下载部署示例代码
|
||||||
|
git clone https://github.com/PaddlePaddle/FastDeploy.git
|
||||||
|
cd examples/vision/ocr/PP-OCRv3/python/
|
||||||
|
|
||||||
|
# CPU推理
|
||||||
|
python3 infer_static_shape.py \
|
||||||
|
--det_model ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \
|
||||||
|
--cls_model ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \
|
||||||
|
--rec_model ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \
|
||||||
|
--rec_label_file ./ppocr_keys_v1.txt \
|
||||||
|
--image 12.jpg \
|
||||||
|
--device cpu
|
||||||
|
|
||||||
|
# NPU推理
|
||||||
|
python3 infer_static_shape.py \
|
||||||
|
--det_model ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer_rk3588_unquantized.rknn \
|
||||||
|
--cls_model ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v20_cls_infer_rk3588_unquantized.rknn \
|
||||||
|
--rec_model ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer_rk3588_unquantized.rknn \
|
||||||
|
--rec_label_file ppocr_keys_v1.txt \
|
||||||
|
--image 12.jpg \
|
||||||
|
--device npu
|
||||||
|
```
|
||||||
|
|
||||||
|
运行完成可视化结果如下图所示
|
||||||
|
<img width="640" src="https://user-images.githubusercontent.com/109218879/185826024-f7593a0c-1bd2-4a60-b76c-15588484fa08.jpg">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## 其它文档
|
||||||
|
|
||||||
|
- [Python API文档查阅](https://baidu-paddle.github.io/fastdeploy-api/python/html/)
|
||||||
|
- [PPOCR 系列模型介绍](../../)
|
||||||
|
- [PPOCRv3 C++部署](../cpp)
|
||||||
|
- [模型预测结果说明](../../../../../../docs/api/vision_results/)
|
||||||
|
- [如何切换模型推理后端引擎](../../../../../../docs/cn/faq/how_to_change_backend.md)
|
144
examples/vision/ocr/PP-OCRv3/rknpu2/python/infer_static_shape.py
Executable file
144
examples/vision/ocr/PP-OCRv3/rknpu2/python/infer_static_shape.py
Executable file
@@ -0,0 +1,144 @@
|
|||||||
|
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import fastdeploy as fd
|
||||||
|
import cv2
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
def parse_arguments():
|
||||||
|
import argparse
|
||||||
|
import ast
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument(
|
||||||
|
"--det_model", required=True, help="Path of Detection model of PPOCR.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--cls_model",
|
||||||
|
required=True,
|
||||||
|
help="Path of Classification model of PPOCR.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--rec_model",
|
||||||
|
required=True,
|
||||||
|
help="Path of Recognization model of PPOCR.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--rec_label_file",
|
||||||
|
required=True,
|
||||||
|
help="Path of Recognization model of PPOCR.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--image", type=str, required=True, help="Path of test image file.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--device",
|
||||||
|
type=str,
|
||||||
|
default='cpu',
|
||||||
|
help="Type of inference device, support 'cpu', 'kunlunxin' or 'gpu'.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--cpu_thread_num",
|
||||||
|
type=int,
|
||||||
|
default=9,
|
||||||
|
help="Number of threads while inference on CPU.")
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def build_option(args):
|
||||||
|
|
||||||
|
det_option = fd.RuntimeOption()
|
||||||
|
cls_option = fd.RuntimeOption()
|
||||||
|
rec_option = fd.RuntimeOption()
|
||||||
|
if args.device == "npu":
|
||||||
|
det_option.use_rknpu2()
|
||||||
|
cls_option.use_rknpu2()
|
||||||
|
rec_option.use_rknpu2()
|
||||||
|
|
||||||
|
return det_option, cls_option, rec_option
|
||||||
|
|
||||||
|
|
||||||
|
def build_format(args):
|
||||||
|
det_format = fd.ModelFormat.ONNX
|
||||||
|
cls_format = fd.ModelFormat.ONNX
|
||||||
|
rec_format = fd.ModelFormat.ONNX
|
||||||
|
if args.device == "npu":
|
||||||
|
det_format = fd.ModelFormat.RKNN
|
||||||
|
cls_format = fd.ModelFormat.RKNN
|
||||||
|
rec_format = fd.ModelFormat.RKNN
|
||||||
|
|
||||||
|
return det_format, cls_format, rec_format
|
||||||
|
|
||||||
|
|
||||||
|
args = parse_arguments()
|
||||||
|
|
||||||
|
# Detection模型, 检测文字框
|
||||||
|
det_model_file = args.det_model
|
||||||
|
det_params_file = ""
|
||||||
|
# Classification模型,方向分类,可选
|
||||||
|
cls_model_file = args.cls_model
|
||||||
|
cls_params_file = ""
|
||||||
|
# Recognition模型,文字识别模型
|
||||||
|
rec_model_file = args.rec_model
|
||||||
|
rec_params_file = ""
|
||||||
|
rec_label_file = args.rec_label_file
|
||||||
|
|
||||||
|
det_option, cls_option, rec_option = build_option(args)
|
||||||
|
det_format, cls_format, rec_format = build_format(args)
|
||||||
|
|
||||||
|
det_model = fd.vision.ocr.DBDetector(
|
||||||
|
det_model_file,
|
||||||
|
det_params_file,
|
||||||
|
runtime_option=det_option,
|
||||||
|
model_format=det_format)
|
||||||
|
|
||||||
|
cls_model = fd.vision.ocr.Classifier(
|
||||||
|
cls_model_file,
|
||||||
|
cls_params_file,
|
||||||
|
runtime_option=cls_option,
|
||||||
|
model_format=cls_format)
|
||||||
|
|
||||||
|
rec_model = fd.vision.ocr.Recognizer(
|
||||||
|
rec_model_file,
|
||||||
|
rec_params_file,
|
||||||
|
rec_label_file,
|
||||||
|
runtime_option=rec_option,
|
||||||
|
model_format=rec_format)
|
||||||
|
|
||||||
|
# Det,Rec模型启用静态shape推理
|
||||||
|
det_model.preprocessor.static_shape_infer = True
|
||||||
|
rec_model.preprocessor.static_shape_infer = True
|
||||||
|
|
||||||
|
if args.device == "npu":
|
||||||
|
det_model.preprocessor.disable_normalize()
|
||||||
|
det_model.preprocessor.disable_permute()
|
||||||
|
cls_model.preprocessor.disable_normalize()
|
||||||
|
cls_model.preprocessor.disable_permute()
|
||||||
|
rec_model.preprocessor.disable_normalize()
|
||||||
|
rec_model.preprocessor.disable_permute()
|
||||||
|
|
||||||
|
# 创建PP-OCR,串联3个模型,其中cls_model可选,如无需求,可设置为None
|
||||||
|
ppocr_v3 = fd.vision.ocr.PPOCRv3(
|
||||||
|
det_model=det_model, cls_model=cls_model, rec_model=rec_model)
|
||||||
|
|
||||||
|
# Cls模型和Rec模型的batch size 必须设置为1, 开启静态shape推理
|
||||||
|
ppocr_v3.cls_batch_size = 1
|
||||||
|
ppocr_v3.rec_batch_size = 1
|
||||||
|
|
||||||
|
# 预测图片准备
|
||||||
|
im = cv2.imread(args.image)
|
||||||
|
|
||||||
|
#预测并打印结果
|
||||||
|
result = ppocr_v3.predict(im)
|
||||||
|
|
||||||
|
print(result)
|
||||||
|
|
||||||
|
# 可视化结果
|
||||||
|
vis_im = fd.vision.vis_ppocr(im, result)
|
||||||
|
cv2.imwrite("visualized_result.jpg", vis_im)
|
||||||
|
print("Visualized result save in ./visualized_result.jpg")
|
35
fastdeploy/vision/ocr/ppocr/classifier.cc
Executable file → Normal file
35
fastdeploy/vision/ocr/ppocr/classifier.cc
Executable file → Normal file
@@ -13,6 +13,7 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
#include "fastdeploy/vision/ocr/ppocr/classifier.h"
|
#include "fastdeploy/vision/ocr/ppocr/classifier.h"
|
||||||
|
|
||||||
#include "fastdeploy/utils/perf.h"
|
#include "fastdeploy/utils/perf.h"
|
||||||
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
|
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
|
||||||
|
|
||||||
@@ -26,15 +27,16 @@ Classifier::Classifier(const std::string& model_file,
|
|||||||
const RuntimeOption& custom_option,
|
const RuntimeOption& custom_option,
|
||||||
const ModelFormat& model_format) {
|
const ModelFormat& model_format) {
|
||||||
if (model_format == ModelFormat::ONNX) {
|
if (model_format == ModelFormat::ONNX) {
|
||||||
valid_cpu_backends = {Backend::ORT,
|
valid_cpu_backends = {Backend::ORT, Backend::OPENVINO};
|
||||||
Backend::OPENVINO};
|
valid_gpu_backends = {Backend::ORT, Backend::TRT};
|
||||||
valid_gpu_backends = {Backend::ORT, Backend::TRT};
|
|
||||||
} else {
|
} else {
|
||||||
valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO, Backend::LITE};
|
valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO,
|
||||||
|
Backend::LITE};
|
||||||
valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
|
valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
|
||||||
valid_kunlunxin_backends = {Backend::LITE};
|
valid_kunlunxin_backends = {Backend::LITE};
|
||||||
valid_ascend_backends = {Backend::LITE};
|
valid_ascend_backends = {Backend::LITE};
|
||||||
valid_sophgonpu_backends = {Backend::SOPHGOTPU};
|
valid_sophgonpu_backends = {Backend::SOPHGOTPU};
|
||||||
|
valid_rknpu_backends = {Backend::RKNPU2};
|
||||||
}
|
}
|
||||||
runtime_option = custom_option;
|
runtime_option = custom_option;
|
||||||
runtime_option.model_format = model_format;
|
runtime_option.model_format = model_format;
|
||||||
@@ -54,16 +56,18 @@ bool Classifier::Initialize() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<Classifier> Classifier::Clone() const {
|
std::unique_ptr<Classifier> Classifier::Clone() const {
|
||||||
std::unique_ptr<Classifier> clone_model = utils::make_unique<Classifier>(Classifier(*this));
|
std::unique_ptr<Classifier> clone_model =
|
||||||
|
utils::make_unique<Classifier>(Classifier(*this));
|
||||||
clone_model->SetRuntime(clone_model->CloneRuntime());
|
clone_model->SetRuntime(clone_model->CloneRuntime());
|
||||||
return clone_model;
|
return clone_model;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Classifier::Predict(const cv::Mat& img, int32_t* cls_label, float* cls_score) {
|
bool Classifier::Predict(const cv::Mat& img, int32_t* cls_label,
|
||||||
|
float* cls_score) {
|
||||||
std::vector<int32_t> cls_labels(1);
|
std::vector<int32_t> cls_labels(1);
|
||||||
std::vector<float> cls_scores(1);
|
std::vector<float> cls_scores(1);
|
||||||
bool success = BatchPredict({img}, &cls_labels, &cls_scores);
|
bool success = BatchPredict({img}, &cls_labels, &cls_scores);
|
||||||
if(!success){
|
if (!success) {
|
||||||
return success;
|
return success;
|
||||||
}
|
}
|
||||||
*cls_label = cls_labels[0];
|
*cls_label = cls_labels[0];
|
||||||
@@ -72,16 +76,19 @@ bool Classifier::Predict(const cv::Mat& img, int32_t* cls_label, float* cls_scor
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool Classifier::BatchPredict(const std::vector<cv::Mat>& images,
|
bool Classifier::BatchPredict(const std::vector<cv::Mat>& images,
|
||||||
std::vector<int32_t>* cls_labels, std::vector<float>* cls_scores) {
|
std::vector<int32_t>* cls_labels,
|
||||||
|
std::vector<float>* cls_scores) {
|
||||||
return BatchPredict(images, cls_labels, cls_scores, 0, images.size());
|
return BatchPredict(images, cls_labels, cls_scores, 0, images.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Classifier::BatchPredict(const std::vector<cv::Mat>& images,
|
bool Classifier::BatchPredict(const std::vector<cv::Mat>& images,
|
||||||
std::vector<int32_t>* cls_labels, std::vector<float>* cls_scores,
|
std::vector<int32_t>* cls_labels,
|
||||||
|
std::vector<float>* cls_scores,
|
||||||
size_t start_index, size_t end_index) {
|
size_t start_index, size_t end_index) {
|
||||||
size_t total_size = images.size();
|
size_t total_size = images.size();
|
||||||
std::vector<FDMat> fd_images = WrapMat(images);
|
std::vector<FDMat> fd_images = WrapMat(images);
|
||||||
if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, start_index, end_index)) {
|
if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, start_index,
|
||||||
|
end_index)) {
|
||||||
FDERROR << "Failed to preprocess the input image." << std::endl;
|
FDERROR << "Failed to preprocess the input image." << std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -91,13 +98,15 @@ bool Classifier::BatchPredict(const std::vector<cv::Mat>& images,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!postprocessor_.Run(reused_output_tensors_, cls_labels, cls_scores, start_index, total_size)) {
|
if (!postprocessor_.Run(reused_output_tensors_, cls_labels, cls_scores,
|
||||||
FDERROR << "Failed to postprocess the inference cls_results by runtime." << std::endl;
|
start_index, total_size)) {
|
||||||
|
FDERROR << "Failed to postprocess the inference cls_results by runtime."
|
||||||
|
<< std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namesapce ocr
|
} // namespace ocr
|
||||||
} // namespace vision
|
} // namespace vision
|
||||||
} // namespace fastdeploy
|
} // namespace fastdeploy
|
||||||
|
30
fastdeploy/vision/ocr/ppocr/cls_preprocessor.cc
Executable file → Normal file
30
fastdeploy/vision/ocr/ppocr/cls_preprocessor.cc
Executable file → Normal file
@@ -13,9 +13,10 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
#include "fastdeploy/vision/ocr/ppocr/cls_preprocessor.h"
|
#include "fastdeploy/vision/ocr/ppocr/cls_preprocessor.h"
|
||||||
|
|
||||||
|
#include "fastdeploy/function/concat.h"
|
||||||
#include "fastdeploy/utils/perf.h"
|
#include "fastdeploy/utils/perf.h"
|
||||||
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
|
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
|
||||||
#include "fastdeploy/function/concat.h"
|
|
||||||
|
|
||||||
namespace fastdeploy {
|
namespace fastdeploy {
|
||||||
namespace vision {
|
namespace vision {
|
||||||
@@ -38,34 +39,43 @@ void OcrClassifierResizeImage(FDMat* mat,
|
|||||||
Resize::Run(mat, resize_w, img_h);
|
Resize::Run(mat, resize_w, img_h);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ClassifierPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs) {
|
bool ClassifierPreprocessor::Run(std::vector<FDMat>* images,
|
||||||
|
std::vector<FDTensor>* outputs) {
|
||||||
return Run(images, outputs, 0, images->size());
|
return Run(images, outputs, 0, images->size());
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ClassifierPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
|
bool ClassifierPreprocessor::Run(std::vector<FDMat>* images,
|
||||||
|
std::vector<FDTensor>* outputs,
|
||||||
size_t start_index, size_t end_index) {
|
size_t start_index, size_t end_index) {
|
||||||
|
if (images->size() == 0 || start_index < 0 || end_index <= start_index ||
|
||||||
if (images->size() == 0 || start_index <0 || end_index <= start_index || end_index > images->size()) {
|
end_index > images->size()) {
|
||||||
FDERROR << "images->size() or index error. Correct is: 0 <= start_index < end_index <= images->size()" << std::endl;
|
FDERROR << "images->size() or index error. Correct is: 0 <= start_index < "
|
||||||
|
"end_index <= images->size()"
|
||||||
|
<< std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = start_index; i < end_index; ++i) {
|
for (size_t i = start_index; i < end_index; ++i) {
|
||||||
FDMat* mat = &(images->at(i));
|
FDMat* mat = &(images->at(i));
|
||||||
OcrClassifierResizeImage(mat, cls_image_shape_);
|
OcrClassifierResizeImage(mat, cls_image_shape_);
|
||||||
Normalize::Run(mat, mean_, scale_, is_scale_);
|
if (!disable_normalize_) {
|
||||||
|
Normalize::Run(mat, mean_, scale_, is_scale_);
|
||||||
|
}
|
||||||
std::vector<float> value = {0, 0, 0};
|
std::vector<float> value = {0, 0, 0};
|
||||||
if (mat->Width() < cls_image_shape_[2]) {
|
if (mat->Width() < cls_image_shape_[2]) {
|
||||||
Pad::Run(mat, 0, 0, 0, cls_image_shape_[2] - mat->Width(), value);
|
Pad::Run(mat, 0, 0, 0, cls_image_shape_[2] - mat->Width(), value);
|
||||||
}
|
}
|
||||||
HWC2CHW::Run(mat);
|
|
||||||
Cast::Run(mat, "float");
|
if (!disable_permute_) {
|
||||||
|
HWC2CHW::Run(mat);
|
||||||
|
Cast::Run(mat, "float");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// Only have 1 output Tensor.
|
// Only have 1 output Tensor.
|
||||||
outputs->resize(1);
|
outputs->resize(1);
|
||||||
// Concat all the preprocessed data to a batch tensor
|
// Concat all the preprocessed data to a batch tensor
|
||||||
size_t tensor_size = end_index - start_index;
|
size_t tensor_size = end_index - start_index;
|
||||||
std::vector<FDTensor> tensors(tensor_size);
|
std::vector<FDTensor> tensors(tensor_size);
|
||||||
for (size_t i = 0; i < tensor_size; ++i) {
|
for (size_t i = 0; i < tensor_size; ++i) {
|
||||||
(*images)[i + start_index].ShareWithTensor(&(tensors[i]));
|
(*images)[i + start_index].ShareWithTensor(&(tensors[i]));
|
||||||
tensors[i].ExpandDim(0);
|
tensors[i].ExpandDim(0);
|
||||||
|
@@ -56,7 +56,16 @@ class FASTDEPLOY_DECL ClassifierPreprocessor {
|
|||||||
/// Get cls_image_shape for the classification preprocess
|
/// Get cls_image_shape for the classification preprocess
|
||||||
std::vector<int> GetClsImageShape() const { return cls_image_shape_; }
|
std::vector<int> GetClsImageShape() const { return cls_image_shape_; }
|
||||||
|
|
||||||
|
/// This function will disable normalize in preprocessing step.
|
||||||
|
void DisableNormalize() { disable_permute_ = true; }
|
||||||
|
/// This function will disable hwc2chw in preprocessing step.
|
||||||
|
void DisablePermute() { disable_normalize_ = true; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
// for recording the switch of hwc2chw
|
||||||
|
bool disable_permute_ = false;
|
||||||
|
// for recording the switch of normalize
|
||||||
|
bool disable_normalize_ = false;
|
||||||
std::vector<float> mean_ = {0.5f, 0.5f, 0.5f};
|
std::vector<float> mean_ = {0.5f, 0.5f, 0.5f};
|
||||||
std::vector<float> scale_ = {0.5f, 0.5f, 0.5f};
|
std::vector<float> scale_ = {0.5f, 0.5f, 0.5f};
|
||||||
bool is_scale_ = true;
|
bool is_scale_ = true;
|
||||||
|
@@ -36,6 +36,7 @@ DBDetector::DBDetector(const std::string& model_file,
|
|||||||
valid_kunlunxin_backends = {Backend::LITE};
|
valid_kunlunxin_backends = {Backend::LITE};
|
||||||
valid_ascend_backends = {Backend::LITE};
|
valid_ascend_backends = {Backend::LITE};
|
||||||
valid_sophgonpu_backends = {Backend::SOPHGOTPU};
|
valid_sophgonpu_backends = {Backend::SOPHGOTPU};
|
||||||
|
valid_rknpu_backends = {Backend::RKNPU2};
|
||||||
}
|
}
|
||||||
|
|
||||||
runtime_option = custom_option;
|
runtime_option = custom_option;
|
||||||
|
@@ -20,9 +20,13 @@ namespace fastdeploy {
|
|||||||
namespace vision {
|
namespace vision {
|
||||||
namespace ocr {
|
namespace ocr {
|
||||||
|
|
||||||
std::array<int, 4> OcrDetectorGetInfo(FDMat* img, int max_size_len) {
|
std::array<int, 4> DBDetectorPreprocessor::OcrDetectorGetInfo(
|
||||||
|
FDMat* img, int max_size_len) {
|
||||||
int w = img->Width();
|
int w = img->Width();
|
||||||
int h = img->Height();
|
int h = img->Height();
|
||||||
|
if (static_shape_infer_) {
|
||||||
|
return {w, h, det_image_shape_[2], det_image_shape_[1]};
|
||||||
|
}
|
||||||
|
|
||||||
float ratio = 1.f;
|
float ratio = 1.f;
|
||||||
int max_wh = w >= h ? w : h;
|
int max_wh = w >= h ? w : h;
|
||||||
@@ -86,7 +90,10 @@ bool DBDetectorPreprocessor::Apply(FDMatBatch* image_batch,
|
|||||||
ResizeImage(mat, batch_det_img_info_[i][2], batch_det_img_info_[i][3],
|
ResizeImage(mat, batch_det_img_info_[i][2], batch_det_img_info_[i][3],
|
||||||
max_resize_w, max_resize_h);
|
max_resize_w, max_resize_h);
|
||||||
}
|
}
|
||||||
(*normalize_permute_op_)(image_batch);
|
|
||||||
|
if (!disable_normalize_ && !disable_permute_) {
|
||||||
|
(*normalize_permute_op_)(image_batch);
|
||||||
|
}
|
||||||
|
|
||||||
outputs->resize(1);
|
outputs->resize(1);
|
||||||
FDTensor* tensor = image_batch->Tensor();
|
FDTensor* tensor = image_batch->Tensor();
|
||||||
|
@@ -59,14 +59,44 @@ class FASTDEPLOY_DECL DBDetectorPreprocessor : public ProcessorManager {
|
|||||||
return &batch_det_img_info_;
|
return &batch_det_img_info_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// This function will disable normalize in preprocessing step.
|
||||||
|
void DisableNormalize() { disable_permute_ = true; }
|
||||||
|
/// This function will disable hwc2chw in preprocessing step.
|
||||||
|
void DisablePermute() { disable_normalize_ = true; }
|
||||||
|
|
||||||
|
/// Set det_image_shape for the detection preprocess.
|
||||||
|
/// This api is usually used when you retrain the model.
|
||||||
|
/// Generally, you do not need to use it.
|
||||||
|
void SetDetImageShape(const std::vector<int>& det_image_shape) {
|
||||||
|
det_image_shape_ = det_image_shape;
|
||||||
|
}
|
||||||
|
/// Get cls_image_shape for the classification preprocess
|
||||||
|
std::vector<int> GetDetImageShape() const { return det_image_shape_; }
|
||||||
|
|
||||||
|
/// Set static_shape_infer is true or not. When deploy PP-OCR
|
||||||
|
/// on hardware which can not support dynamic input shape very well,
|
||||||
|
/// like Huawei Ascned, static_shape_infer needs to to be true.
|
||||||
|
void SetStaticShapeInfer(bool static_shape_infer) {
|
||||||
|
static_shape_infer_ = static_shape_infer;
|
||||||
|
}
|
||||||
|
/// Get static_shape_infer of the recognition preprocess
|
||||||
|
bool GetStaticShapeInfer() const { return static_shape_infer_; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool ResizeImage(FDMat* img, int resize_w, int resize_h, int max_resize_w,
|
bool ResizeImage(FDMat* img, int resize_w, int resize_h, int max_resize_w,
|
||||||
int max_resize_h);
|
int max_resize_h);
|
||||||
|
// for recording the switch of hwc2chw
|
||||||
|
bool disable_permute_ = false;
|
||||||
|
// for recording the switch of normalize
|
||||||
|
bool disable_normalize_ = false;
|
||||||
int max_side_len_ = 960;
|
int max_side_len_ = 960;
|
||||||
std::vector<std::array<int, 4>> batch_det_img_info_;
|
std::vector<std::array<int, 4>> batch_det_img_info_;
|
||||||
std::shared_ptr<Resize> resize_op_;
|
std::shared_ptr<Resize> resize_op_;
|
||||||
std::shared_ptr<Pad> pad_op_;
|
std::shared_ptr<Pad> pad_op_;
|
||||||
std::shared_ptr<NormalizeAndPermute> normalize_permute_op_;
|
std::shared_ptr<NormalizeAndPermute> normalize_permute_op_;
|
||||||
|
std::vector<int> det_image_shape_ = {3, 960, 960};
|
||||||
|
bool static_shape_infer_ = false;
|
||||||
|
std::array<int, 4> OcrDetectorGetInfo(FDMat* img, int max_size_len);
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace ocr
|
} // namespace ocr
|
||||||
|
@@ -26,6 +26,9 @@ void BindPPOCRModel(pybind11::module& m) {
|
|||||||
pybind11::class_<vision::ocr::DBDetectorPreprocessor>(
|
pybind11::class_<vision::ocr::DBDetectorPreprocessor>(
|
||||||
m, "DBDetectorPreprocessor")
|
m, "DBDetectorPreprocessor")
|
||||||
.def(pybind11::init<>())
|
.def(pybind11::init<>())
|
||||||
|
.def_property("static_shape_infer",
|
||||||
|
&vision::ocr::DBDetectorPreprocessor::GetStaticShapeInfer,
|
||||||
|
&vision::ocr::DBDetectorPreprocessor::SetStaticShapeInfer)
|
||||||
.def_property("max_side_len",
|
.def_property("max_side_len",
|
||||||
&vision::ocr::DBDetectorPreprocessor::GetMaxSideLen,
|
&vision::ocr::DBDetectorPreprocessor::GetMaxSideLen,
|
||||||
&vision::ocr::DBDetectorPreprocessor::SetMaxSideLen)
|
&vision::ocr::DBDetectorPreprocessor::SetMaxSideLen)
|
||||||
@@ -33,19 +36,27 @@ void BindPPOCRModel(pybind11::module& m) {
|
|||||||
[](vision::ocr::DBDetectorPreprocessor& self,
|
[](vision::ocr::DBDetectorPreprocessor& self,
|
||||||
const std::vector<float>& mean, const std::vector<float>& std,
|
const std::vector<float>& mean, const std::vector<float>& std,
|
||||||
bool is_scale) { self.SetNormalize(mean, std, is_scale); })
|
bool is_scale) { self.SetNormalize(mean, std, is_scale); })
|
||||||
.def("run", [](vision::ocr::DBDetectorPreprocessor& self,
|
.def("run",
|
||||||
std::vector<pybind11::array>& im_list) {
|
[](vision::ocr::DBDetectorPreprocessor& self,
|
||||||
std::vector<vision::FDMat> images;
|
std::vector<pybind11::array>& im_list) {
|
||||||
for (size_t i = 0; i < im_list.size(); ++i) {
|
std::vector<vision::FDMat> images;
|
||||||
images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
|
for (size_t i = 0; i < im_list.size(); ++i) {
|
||||||
}
|
images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
|
||||||
std::vector<FDTensor> outputs;
|
}
|
||||||
self.Run(&images, &outputs);
|
std::vector<FDTensor> outputs;
|
||||||
auto batch_det_img_info = self.GetBatchImgInfo();
|
self.Run(&images, &outputs);
|
||||||
for (size_t i = 0; i < outputs.size(); ++i) {
|
auto batch_det_img_info = self.GetBatchImgInfo();
|
||||||
outputs[i].StopSharing();
|
for (size_t i = 0; i < outputs.size(); ++i) {
|
||||||
}
|
outputs[i].StopSharing();
|
||||||
return std::make_pair(outputs, *batch_det_img_info);
|
}
|
||||||
|
return std::make_pair(outputs, *batch_det_img_info);
|
||||||
|
})
|
||||||
|
.def("disable_normalize",
|
||||||
|
[](vision::ocr::DBDetectorPreprocessor& self) {
|
||||||
|
self.DisableNormalize();
|
||||||
|
})
|
||||||
|
.def("disable_permute", [](vision::ocr::DBDetectorPreprocessor& self) {
|
||||||
|
self.DisablePermute();
|
||||||
});
|
});
|
||||||
|
|
||||||
pybind11::class_<vision::ocr::DBDetectorPostprocessor>(
|
pybind11::class_<vision::ocr::DBDetectorPostprocessor>(
|
||||||
@@ -135,21 +146,30 @@ void BindPPOCRModel(pybind11::module& m) {
|
|||||||
.def_property("is_scale",
|
.def_property("is_scale",
|
||||||
&vision::ocr::ClassifierPreprocessor::GetIsScale,
|
&vision::ocr::ClassifierPreprocessor::GetIsScale,
|
||||||
&vision::ocr::ClassifierPreprocessor::SetIsScale)
|
&vision::ocr::ClassifierPreprocessor::SetIsScale)
|
||||||
.def("run", [](vision::ocr::ClassifierPreprocessor& self,
|
.def("run",
|
||||||
std::vector<pybind11::array>& im_list) {
|
[](vision::ocr::ClassifierPreprocessor& self,
|
||||||
std::vector<vision::FDMat> images;
|
std::vector<pybind11::array>& im_list) {
|
||||||
for (size_t i = 0; i < im_list.size(); ++i) {
|
std::vector<vision::FDMat> images;
|
||||||
images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
|
for (size_t i = 0; i < im_list.size(); ++i) {
|
||||||
}
|
images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
|
||||||
std::vector<FDTensor> outputs;
|
}
|
||||||
if (!self.Run(&images, &outputs)) {
|
std::vector<FDTensor> outputs;
|
||||||
throw std::runtime_error(
|
if (!self.Run(&images, &outputs)) {
|
||||||
"Failed to preprocess the input data in ClassifierPreprocessor.");
|
throw std::runtime_error(
|
||||||
}
|
"Failed to preprocess the input data in "
|
||||||
for (size_t i = 0; i < outputs.size(); ++i) {
|
"ClassifierPreprocessor.");
|
||||||
outputs[i].StopSharing();
|
}
|
||||||
}
|
for (size_t i = 0; i < outputs.size(); ++i) {
|
||||||
return outputs;
|
outputs[i].StopSharing();
|
||||||
|
}
|
||||||
|
return outputs;
|
||||||
|
})
|
||||||
|
.def("disable_normalize",
|
||||||
|
[](vision::ocr::ClassifierPreprocessor& self) {
|
||||||
|
self.DisableNormalize();
|
||||||
|
})
|
||||||
|
.def("disable_permute", [](vision::ocr::ClassifierPreprocessor& self) {
|
||||||
|
self.DisablePermute();
|
||||||
});
|
});
|
||||||
|
|
||||||
pybind11::class_<vision::ocr::ClassifierPostprocessor>(
|
pybind11::class_<vision::ocr::ClassifierPostprocessor>(
|
||||||
@@ -229,21 +249,30 @@ void BindPPOCRModel(pybind11::module& m) {
|
|||||||
.def_property("is_scale",
|
.def_property("is_scale",
|
||||||
&vision::ocr::RecognizerPreprocessor::GetIsScale,
|
&vision::ocr::RecognizerPreprocessor::GetIsScale,
|
||||||
&vision::ocr::RecognizerPreprocessor::SetIsScale)
|
&vision::ocr::RecognizerPreprocessor::SetIsScale)
|
||||||
.def("run", [](vision::ocr::RecognizerPreprocessor& self,
|
.def("run",
|
||||||
std::vector<pybind11::array>& im_list) {
|
[](vision::ocr::RecognizerPreprocessor& self,
|
||||||
std::vector<vision::FDMat> images;
|
std::vector<pybind11::array>& im_list) {
|
||||||
for (size_t i = 0; i < im_list.size(); ++i) {
|
std::vector<vision::FDMat> images;
|
||||||
images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
|
for (size_t i = 0; i < im_list.size(); ++i) {
|
||||||
}
|
images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
|
||||||
std::vector<FDTensor> outputs;
|
}
|
||||||
if (!self.Run(&images, &outputs)) {
|
std::vector<FDTensor> outputs;
|
||||||
throw std::runtime_error(
|
if (!self.Run(&images, &outputs)) {
|
||||||
"Failed to preprocess the input data in RecognizerPreprocessor.");
|
throw std::runtime_error(
|
||||||
}
|
"Failed to preprocess the input data in "
|
||||||
for (size_t i = 0; i < outputs.size(); ++i) {
|
"RecognizerPreprocessor.");
|
||||||
outputs[i].StopSharing();
|
}
|
||||||
}
|
for (size_t i = 0; i < outputs.size(); ++i) {
|
||||||
return outputs;
|
outputs[i].StopSharing();
|
||||||
|
}
|
||||||
|
return outputs;
|
||||||
|
})
|
||||||
|
.def("disable_normalize",
|
||||||
|
[](vision::ocr::RecognizerPreprocessor& self) {
|
||||||
|
self.DisableNormalize();
|
||||||
|
})
|
||||||
|
.def("disable_permute", [](vision::ocr::RecognizerPreprocessor& self) {
|
||||||
|
self.DisablePermute();
|
||||||
});
|
});
|
||||||
|
|
||||||
pybind11::class_<vision::ocr::RecognizerPostprocessor>(
|
pybind11::class_<vision::ocr::RecognizerPostprocessor>(
|
||||||
|
@@ -13,22 +13,23 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
#include "fastdeploy/vision/ocr/ppocr/rec_preprocessor.h"
|
#include "fastdeploy/vision/ocr/ppocr/rec_preprocessor.h"
|
||||||
|
|
||||||
|
#include "fastdeploy/function/concat.h"
|
||||||
#include "fastdeploy/utils/perf.h"
|
#include "fastdeploy/utils/perf.h"
|
||||||
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
|
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
|
||||||
#include "fastdeploy/function/concat.h"
|
|
||||||
|
|
||||||
namespace fastdeploy {
|
namespace fastdeploy {
|
||||||
namespace vision {
|
namespace vision {
|
||||||
namespace ocr {
|
namespace ocr {
|
||||||
|
|
||||||
void OcrRecognizerResizeImage(FDMat* mat, float max_wh_ratio,
|
void OcrRecognizerResizeImage(FDMat* mat, float max_wh_ratio,
|
||||||
const std::vector<int>& rec_image_shape, bool static_shape_infer) {
|
const std::vector<int>& rec_image_shape,
|
||||||
|
bool static_shape_infer) {
|
||||||
int img_h, img_w;
|
int img_h, img_w;
|
||||||
img_h = rec_image_shape[1];
|
img_h = rec_image_shape[1];
|
||||||
img_w = rec_image_shape[2];
|
img_w = rec_image_shape[2];
|
||||||
|
|
||||||
if (!static_shape_infer) {
|
if (!static_shape_infer) {
|
||||||
|
|
||||||
img_w = int(img_h * max_wh_ratio);
|
img_w = int(img_h * max_wh_ratio);
|
||||||
float ratio = float(mat->Width()) / float(mat->Height());
|
float ratio = float(mat->Width()) / float(mat->Height());
|
||||||
|
|
||||||
@@ -43,23 +44,29 @@ void OcrRecognizerResizeImage(FDMat* mat, float max_wh_ratio,
|
|||||||
|
|
||||||
} else {
|
} else {
|
||||||
if (mat->Width() >= img_w) {
|
if (mat->Width() >= img_w) {
|
||||||
Resize::Run(mat, img_w, img_h); // Reszie W to 320
|
Resize::Run(mat, img_w, img_h); // Reszie W to 320
|
||||||
} else {
|
} else {
|
||||||
Resize::Run(mat, mat->Width(), img_h);
|
Resize::Run(mat, mat->Width(), img_h);
|
||||||
Pad::Run(mat, 0, 0, 0, int(img_w - mat->Width()), {127, 127, 127});
|
Pad::Run(mat, 0, 0, 0, int(img_w - mat->Width()), {127, 127, 127});
|
||||||
// Pad to 320
|
// Pad to 320
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RecognizerPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs) {
|
bool RecognizerPreprocessor::Run(std::vector<FDMat>* images,
|
||||||
|
std::vector<FDTensor>* outputs) {
|
||||||
return Run(images, outputs, 0, images->size(), {});
|
return Run(images, outputs, 0, images->size(), {});
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RecognizerPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
|
bool RecognizerPreprocessor::Run(std::vector<FDMat>* images,
|
||||||
size_t start_index, size_t end_index, const std::vector<int>& indices) {
|
std::vector<FDTensor>* outputs,
|
||||||
if (images->size() == 0 || end_index <= start_index || end_index > images->size()) {
|
size_t start_index, size_t end_index,
|
||||||
FDERROR << "images->size() or index error. Correct is: 0 <= start_index < end_index <= images->size()" << std::endl;
|
const std::vector<int>& indices) {
|
||||||
|
if (images->size() == 0 || end_index <= start_index ||
|
||||||
|
end_index > images->size()) {
|
||||||
|
FDERROR << "images->size() or index error. Correct is: 0 <= start_index < "
|
||||||
|
"end_index <= images->size()"
|
||||||
|
<< std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -67,7 +74,7 @@ bool RecognizerPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTenso
|
|||||||
int img_w = rec_image_shape_[2];
|
int img_w = rec_image_shape_[2];
|
||||||
float max_wh_ratio = img_w * 1.0 / img_h;
|
float max_wh_ratio = img_w * 1.0 / img_h;
|
||||||
float ori_wh_ratio;
|
float ori_wh_ratio;
|
||||||
|
|
||||||
for (size_t i = start_index; i < end_index; ++i) {
|
for (size_t i = start_index; i < end_index; ++i) {
|
||||||
size_t real_index = i;
|
size_t real_index = i;
|
||||||
if (indices.size() != 0) {
|
if (indices.size() != 0) {
|
||||||
@@ -84,20 +91,31 @@ bool RecognizerPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTenso
|
|||||||
real_index = indices[i];
|
real_index = indices[i];
|
||||||
}
|
}
|
||||||
FDMat* mat = &(images->at(real_index));
|
FDMat* mat = &(images->at(real_index));
|
||||||
OcrRecognizerResizeImage(mat, max_wh_ratio, rec_image_shape_, static_shape_infer_);
|
OcrRecognizerResizeImage(mat, max_wh_ratio, rec_image_shape_,
|
||||||
NormalizeAndPermute::Run(mat, mean_, scale_, is_scale_);
|
static_shape_infer_);
|
||||||
|
if (!disable_normalize_ && !disable_permute_) {
|
||||||
|
NormalizeAndPermute::Run(mat, mean_, scale_, is_scale_);
|
||||||
|
} else {
|
||||||
|
if (!disable_normalize_) {
|
||||||
|
Normalize::Run(mat, mean_, scale_, is_scale_);
|
||||||
|
}
|
||||||
|
if (!disable_permute_) {
|
||||||
|
HWC2CHW::Run(mat);
|
||||||
|
Cast::Run(mat, "float");
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// Only have 1 output Tensor.
|
// Only have 1 output Tensor.
|
||||||
outputs->resize(1);
|
outputs->resize(1);
|
||||||
size_t tensor_size = end_index-start_index;
|
size_t tensor_size = end_index - start_index;
|
||||||
// Concat all the preprocessed data to a batch tensor
|
// Concat all the preprocessed data to a batch tensor
|
||||||
std::vector<FDTensor> tensors(tensor_size);
|
std::vector<FDTensor> tensors(tensor_size);
|
||||||
for (size_t i = 0; i < tensor_size; ++i) {
|
for (size_t i = 0; i < tensor_size; ++i) {
|
||||||
size_t real_index = i + start_index;
|
size_t real_index = i + start_index;
|
||||||
if (indices.size() != 0) {
|
if (indices.size() != 0) {
|
||||||
real_index = indices[i + start_index];
|
real_index = indices[i + start_index];
|
||||||
}
|
}
|
||||||
|
|
||||||
(*images)[real_index].ShareWithTensor(&(tensors[i]));
|
(*images)[real_index].ShareWithTensor(&(tensors[i]));
|
||||||
tensors[i].ExpandDim(0);
|
tensors[i].ExpandDim(0);
|
||||||
}
|
}
|
||||||
|
@@ -66,7 +66,16 @@ class FASTDEPLOY_DECL RecognizerPreprocessor {
|
|||||||
/// Get rec_image_shape for the recognition preprocess
|
/// Get rec_image_shape for the recognition preprocess
|
||||||
std::vector<int> GetRecImageShape() { return rec_image_shape_; }
|
std::vector<int> GetRecImageShape() { return rec_image_shape_; }
|
||||||
|
|
||||||
|
/// This function will disable normalize in preprocessing step.
|
||||||
|
void DisableNormalize() { disable_permute_ = true; }
|
||||||
|
/// This function will disable hwc2chw in preprocessing step.
|
||||||
|
void DisablePermute() { disable_normalize_ = true; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
// for recording the switch of hwc2chw
|
||||||
|
bool disable_permute_ = false;
|
||||||
|
// for recording the switch of normalize
|
||||||
|
bool disable_normalize_ = false;
|
||||||
std::vector<int> rec_image_shape_ = {3, 48, 320};
|
std::vector<int> rec_image_shape_ = {3, 48, 320};
|
||||||
std::vector<float> mean_ = {0.5f, 0.5f, 0.5f};
|
std::vector<float> mean_ = {0.5f, 0.5f, 0.5f};
|
||||||
std::vector<float> scale_ = {0.5f, 0.5f, 0.5f};
|
std::vector<float> scale_ = {0.5f, 0.5f, 0.5f};
|
||||||
|
41
fastdeploy/vision/ocr/ppocr/recognizer.cc
Executable file → Normal file
41
fastdeploy/vision/ocr/ppocr/recognizer.cc
Executable file → Normal file
@@ -13,6 +13,7 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
#include "fastdeploy/vision/ocr/ppocr/recognizer.h"
|
#include "fastdeploy/vision/ocr/ppocr/recognizer.h"
|
||||||
|
|
||||||
#include "fastdeploy/utils/perf.h"
|
#include "fastdeploy/utils/perf.h"
|
||||||
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
|
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
|
||||||
|
|
||||||
@@ -26,17 +27,19 @@ Recognizer::Recognizer(const std::string& model_file,
|
|||||||
const std::string& params_file,
|
const std::string& params_file,
|
||||||
const std::string& label_path,
|
const std::string& label_path,
|
||||||
const RuntimeOption& custom_option,
|
const RuntimeOption& custom_option,
|
||||||
const ModelFormat& model_format):postprocessor_(label_path) {
|
const ModelFormat& model_format)
|
||||||
|
: postprocessor_(label_path) {
|
||||||
if (model_format == ModelFormat::ONNX) {
|
if (model_format == ModelFormat::ONNX) {
|
||||||
valid_cpu_backends = {Backend::ORT,
|
valid_cpu_backends = {Backend::ORT, Backend::OPENVINO};
|
||||||
Backend::OPENVINO};
|
valid_gpu_backends = {Backend::ORT, Backend::TRT};
|
||||||
valid_gpu_backends = {Backend::ORT, Backend::TRT};
|
|
||||||
} else {
|
} else {
|
||||||
valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO, Backend::LITE};
|
valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO,
|
||||||
|
Backend::LITE};
|
||||||
valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
|
valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
|
||||||
valid_kunlunxin_backends = {Backend::LITE};
|
valid_kunlunxin_backends = {Backend::LITE};
|
||||||
valid_ascend_backends = {Backend::LITE};
|
valid_ascend_backends = {Backend::LITE};
|
||||||
valid_sophgonpu_backends = {Backend::SOPHGOTPU};
|
valid_sophgonpu_backends = {Backend::SOPHGOTPU};
|
||||||
|
valid_rknpu_backends = {Backend::RKNPU2};
|
||||||
}
|
}
|
||||||
|
|
||||||
runtime_option = custom_option;
|
runtime_option = custom_option;
|
||||||
@@ -57,12 +60,14 @@ bool Recognizer::Initialize() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<Recognizer> Recognizer::Clone() const {
|
std::unique_ptr<Recognizer> Recognizer::Clone() const {
|
||||||
std::unique_ptr<Recognizer> clone_model = utils::make_unique<Recognizer>(Recognizer(*this));
|
std::unique_ptr<Recognizer> clone_model =
|
||||||
|
utils::make_unique<Recognizer>(Recognizer(*this));
|
||||||
clone_model->SetRuntime(clone_model->CloneRuntime());
|
clone_model->SetRuntime(clone_model->CloneRuntime());
|
||||||
return clone_model;
|
return clone_model;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Recognizer::Predict(const cv::Mat& img, std::string* text, float* rec_score) {
|
bool Recognizer::Predict(const cv::Mat& img, std::string* text,
|
||||||
|
float* rec_score) {
|
||||||
std::vector<std::string> texts(1);
|
std::vector<std::string> texts(1);
|
||||||
std::vector<float> rec_scores(1);
|
std::vector<float> rec_scores(1);
|
||||||
bool success = BatchPredict({img}, &texts, &rec_scores);
|
bool success = BatchPredict({img}, &texts, &rec_scores);
|
||||||
@@ -75,20 +80,24 @@ bool Recognizer::Predict(const cv::Mat& img, std::string* text, float* rec_score
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool Recognizer::BatchPredict(const std::vector<cv::Mat>& images,
|
bool Recognizer::BatchPredict(const std::vector<cv::Mat>& images,
|
||||||
std::vector<std::string>* texts, std::vector<float>* rec_scores) {
|
std::vector<std::string>* texts,
|
||||||
|
std::vector<float>* rec_scores) {
|
||||||
return BatchPredict(images, texts, rec_scores, 0, images.size(), {});
|
return BatchPredict(images, texts, rec_scores, 0, images.size(), {});
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Recognizer::BatchPredict(const std::vector<cv::Mat>& images,
|
bool Recognizer::BatchPredict(const std::vector<cv::Mat>& images,
|
||||||
std::vector<std::string>* texts, std::vector<float>* rec_scores,
|
std::vector<std::string>* texts,
|
||||||
size_t start_index, size_t end_index, const std::vector<int>& indices) {
|
std::vector<float>* rec_scores,
|
||||||
|
size_t start_index, size_t end_index,
|
||||||
|
const std::vector<int>& indices) {
|
||||||
size_t total_size = images.size();
|
size_t total_size = images.size();
|
||||||
if (indices.size() != 0 && indices.size() != total_size) {
|
if (indices.size() != 0 && indices.size() != total_size) {
|
||||||
FDERROR << "indices.size() should be 0 or images.size()." << std::endl;
|
FDERROR << "indices.size() should be 0 or images.size()." << std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
std::vector<FDMat> fd_images = WrapMat(images);
|
std::vector<FDMat> fd_images = WrapMat(images);
|
||||||
if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, start_index, end_index, indices)) {
|
if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, start_index,
|
||||||
|
end_index, indices)) {
|
||||||
FDERROR << "Failed to preprocess the input image." << std::endl;
|
FDERROR << "Failed to preprocess the input image." << std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -99,13 +108,15 @@ bool Recognizer::BatchPredict(const std::vector<cv::Mat>& images,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!postprocessor_.Run(reused_output_tensors_, texts, rec_scores, start_index, total_size, indices)) {
|
if (!postprocessor_.Run(reused_output_tensors_, texts, rec_scores,
|
||||||
FDERROR << "Failed to postprocess the inference cls_results by runtime." << std::endl;
|
start_index, total_size, indices)) {
|
||||||
|
FDERROR << "Failed to postprocess the inference cls_results by runtime."
|
||||||
|
<< std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namesapce ocr
|
} // namespace ocr
|
||||||
} // namespace vision
|
} // namespace vision
|
||||||
} // namespace fastdeploy
|
} // namespace fastdeploy
|
@@ -17,10 +17,14 @@
|
|||||||
namespace fastdeploy {
|
namespace fastdeploy {
|
||||||
namespace vision {
|
namespace vision {
|
||||||
|
|
||||||
cv::Mat VisOcr(const cv::Mat &im, const OCRResult &ocr_result) {
|
cv::Mat VisOcr(const cv::Mat& im, const OCRResult& ocr_result,
|
||||||
|
const float score_threshold) {
|
||||||
auto vis_im = im.clone();
|
auto vis_im = im.clone();
|
||||||
|
|
||||||
for (int n = 0; n < ocr_result.boxes.size(); n++) {
|
for (int n = 0; n < ocr_result.boxes.size(); n++) {
|
||||||
|
if (ocr_result.rec_scores[n] < score_threshold) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
cv::Point rook_points[4];
|
cv::Point rook_points[4];
|
||||||
|
|
||||||
for (int m = 0; m < 4; m++) {
|
for (int m = 0; m < 4; m++) {
|
||||||
@@ -28,7 +32,7 @@ cv::Mat VisOcr(const cv::Mat &im, const OCRResult &ocr_result) {
|
|||||||
int(ocr_result.boxes[n][m * 2 + 1]));
|
int(ocr_result.boxes[n][m * 2 + 1]));
|
||||||
}
|
}
|
||||||
|
|
||||||
const cv::Point *ppt[1] = {rook_points};
|
const cv::Point* ppt[1] = {rook_points};
|
||||||
int npt[] = {4};
|
int npt[] = {4};
|
||||||
cv::polylines(vis_im, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0);
|
cv::polylines(vis_im, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0);
|
||||||
}
|
}
|
||||||
@@ -36,7 +40,7 @@ cv::Mat VisOcr(const cv::Mat &im, const OCRResult &ocr_result) {
|
|||||||
return vis_im;
|
return vis_im;
|
||||||
}
|
}
|
||||||
|
|
||||||
cv::Mat Visualize::VisOcr(const cv::Mat &im, const OCRResult &ocr_result) {
|
cv::Mat Visualize::VisOcr(const cv::Mat& im, const OCRResult& ocr_result) {
|
||||||
FDWARNING
|
FDWARNING
|
||||||
<< "DEPRECATED: fastdeploy::vision::Visualize::VisOcr is deprecated, "
|
<< "DEPRECATED: fastdeploy::vision::Visualize::VisOcr is deprecated, "
|
||||||
"please use fastdeploy::vision:VisOcr function instead."
|
"please use fastdeploy::vision:VisOcr function instead."
|
||||||
@@ -51,7 +55,7 @@ cv::Mat Visualize::VisOcr(const cv::Mat &im, const OCRResult &ocr_result) {
|
|||||||
int(ocr_result.boxes[n][m * 2 + 1]));
|
int(ocr_result.boxes[n][m * 2 + 1]));
|
||||||
}
|
}
|
||||||
|
|
||||||
const cv::Point *ppt[1] = {rook_points};
|
const cv::Point* ppt[1] = {rook_points};
|
||||||
int npt[] = {4};
|
int npt[] = {4};
|
||||||
cv::polylines(vis_im, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0);
|
cv::polylines(vis_im, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0);
|
||||||
}
|
}
|
||||||
|
@@ -15,8 +15,8 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "fastdeploy/vision/common/result.h"
|
#include "fastdeploy/vision/common/result.h"
|
||||||
#include "opencv2/imgproc/imgproc.hpp"
|
|
||||||
#include "fastdeploy/vision/tracking/pptracking/model.h"
|
#include "fastdeploy/vision/tracking/pptracking/model.h"
|
||||||
|
#include "opencv2/imgproc/imgproc.hpp"
|
||||||
|
|
||||||
namespace fastdeploy {
|
namespace fastdeploy {
|
||||||
/** \brief All C++ FastDeploy Vision Models APIs are defined inside this namespace
|
/** \brief All C++ FastDeploy Vision Models APIs are defined inside this namespace
|
||||||
@@ -41,9 +41,10 @@ class FASTDEPLOY_DECL Visualize {
|
|||||||
bool remove_small_connected_area = false);
|
bool remove_small_connected_area = false);
|
||||||
static cv::Mat RemoveSmallConnectedArea(const cv::Mat& alpha_pred,
|
static cv::Mat RemoveSmallConnectedArea(const cv::Mat& alpha_pred,
|
||||||
float threshold);
|
float threshold);
|
||||||
static cv::Mat SwapBackgroundMatting(
|
static cv::Mat
|
||||||
const cv::Mat& im, const cv::Mat& background, const MattingResult& result,
|
SwapBackgroundMatting(const cv::Mat& im, const cv::Mat& background,
|
||||||
bool remove_small_connected_area = false);
|
const MattingResult& result,
|
||||||
|
bool remove_small_connected_area = false);
|
||||||
static cv::Mat SwapBackgroundSegmentation(const cv::Mat& im,
|
static cv::Mat SwapBackgroundSegmentation(const cv::Mat& im,
|
||||||
const cv::Mat& background,
|
const cv::Mat& background,
|
||||||
int background_label,
|
int background_label,
|
||||||
@@ -90,9 +91,11 @@ FASTDEPLOY_DECL cv::Mat VisDetection(const cv::Mat& im,
|
|||||||
* \param[in] font_size font size
|
* \param[in] font_size font size
|
||||||
* \return cv::Mat type stores the visualized results
|
* \return cv::Mat type stores the visualized results
|
||||||
*/
|
*/
|
||||||
FASTDEPLOY_DECL cv::Mat VisClassification(
|
FASTDEPLOY_DECL cv::Mat VisClassification(const cv::Mat& im,
|
||||||
const cv::Mat& im, const ClassifyResult& result, int top_k = 5,
|
const ClassifyResult& result,
|
||||||
float score_threshold = 0.0f, float font_size = 0.5f);
|
int top_k = 5,
|
||||||
|
float score_threshold = 0.0f,
|
||||||
|
float font_size = 0.5f);
|
||||||
/** \brief Show the visualized results with custom labels for classification models
|
/** \brief Show the visualized results with custom labels for classification models
|
||||||
*
|
*
|
||||||
* \param[in] im the input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format
|
* \param[in] im the input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format
|
||||||
@@ -103,10 +106,10 @@ FASTDEPLOY_DECL cv::Mat VisClassification(
|
|||||||
* \param[in] font_size font size
|
* \param[in] font_size font size
|
||||||
* \return cv::Mat type stores the visualized results
|
* \return cv::Mat type stores the visualized results
|
||||||
*/
|
*/
|
||||||
FASTDEPLOY_DECL cv::Mat VisClassification(
|
FASTDEPLOY_DECL cv::Mat
|
||||||
const cv::Mat& im, const ClassifyResult& result,
|
VisClassification(const cv::Mat& im, const ClassifyResult& result,
|
||||||
const std::vector<std::string>& labels, int top_k = 5,
|
const std::vector<std::string>& labels, int top_k = 5,
|
||||||
float score_threshold = 0.0f, float font_size = 0.5f);
|
float score_threshold = 0.0f, float font_size = 0.5f);
|
||||||
/** \brief Show the visualized results for face detection models
|
/** \brief Show the visualized results for face detection models
|
||||||
*
|
*
|
||||||
* \param[in] im the input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format
|
* \param[in] im the input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format
|
||||||
@@ -159,7 +162,8 @@ FASTDEPLOY_DECL cv::Mat VisMatting(const cv::Mat& im,
|
|||||||
* \param[in] result the result produced by model
|
* \param[in] result the result produced by model
|
||||||
* \return cv::Mat type stores the visualized results
|
* \return cv::Mat type stores the visualized results
|
||||||
*/
|
*/
|
||||||
FASTDEPLOY_DECL cv::Mat VisOcr(const cv::Mat& im, const OCRResult& ocr_result);
|
FASTDEPLOY_DECL cv::Mat VisOcr(const cv::Mat& im, const OCRResult& ocr_result,
|
||||||
|
const float score_threshold = 0);
|
||||||
|
|
||||||
FASTDEPLOY_DECL cv::Mat VisMOT(const cv::Mat& img, const MOTResult& results,
|
FASTDEPLOY_DECL cv::Mat VisMOT(const cv::Mat& img, const MOTResult& results,
|
||||||
float score_threshold = 0.0f,
|
float score_threshold = 0.0f,
|
||||||
@@ -172,10 +176,10 @@ FASTDEPLOY_DECL cv::Mat VisMOT(const cv::Mat& img, const MOTResult& results,
|
|||||||
* \param[in] remove_small_connected_area if remove_small_connected_area==true, the visualized result will not include the small connected areas
|
* \param[in] remove_small_connected_area if remove_small_connected_area==true, the visualized result will not include the small connected areas
|
||||||
* \return cv::Mat type stores the visualized results
|
* \return cv::Mat type stores the visualized results
|
||||||
*/
|
*/
|
||||||
FASTDEPLOY_DECL cv::Mat SwapBackground(const cv::Mat& im,
|
FASTDEPLOY_DECL cv::Mat
|
||||||
const cv::Mat& background,
|
SwapBackground(const cv::Mat& im, const cv::Mat& background,
|
||||||
const MattingResult& result,
|
const MattingResult& result,
|
||||||
bool remove_small_connected_area = false);
|
bool remove_small_connected_area = false);
|
||||||
/** \brief Swap the image background with SegmentationResult
|
/** \brief Swap the image background with SegmentationResult
|
||||||
*
|
*
|
||||||
* \param[in] im the input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format
|
* \param[in] im the input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format
|
||||||
@@ -196,12 +200,11 @@ FASTDEPLOY_DECL cv::Mat SwapBackground(const cv::Mat& im,
|
|||||||
* \param[in] conf_threshold threshold for result scores, the result will not be shown if the score is less than conf_threshold
|
* \param[in] conf_threshold threshold for result scores, the result will not be shown if the score is less than conf_threshold
|
||||||
* \return cv::Mat type stores the visualized results
|
* \return cv::Mat type stores the visualized results
|
||||||
*/
|
*/
|
||||||
FASTDEPLOY_DECL cv::Mat VisKeypointDetection(const cv::Mat& im,
|
FASTDEPLOY_DECL cv::Mat
|
||||||
const KeyPointDetectionResult& results,
|
VisKeypointDetection(const cv::Mat& im, const KeyPointDetectionResult& results,
|
||||||
float conf_threshold = 0.5f);
|
float conf_threshold = 0.5f);
|
||||||
FASTDEPLOY_DECL cv::Mat VisHeadPose(const cv::Mat& im,
|
FASTDEPLOY_DECL cv::Mat VisHeadPose(const cv::Mat& im,
|
||||||
const HeadPoseResult& result,
|
const HeadPoseResult& result, int size = 50,
|
||||||
int size = 50,
|
|
||||||
int line_size = 1);
|
int line_size = 1);
|
||||||
|
|
||||||
} // namespace vision
|
} // namespace vision
|
||||||
|
@@ -65,6 +65,29 @@ class DBDetectorPreprocessor:
|
|||||||
"""
|
"""
|
||||||
self._preprocessor.set_normalize(mean, std, is_scale)
|
self._preprocessor.set_normalize(mean, std, is_scale)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def static_shape_infer(self):
|
||||||
|
return self._preprocessor.static_shape_infer
|
||||||
|
|
||||||
|
@static_shape_infer.setter
|
||||||
|
def static_shape_infer(self, value):
|
||||||
|
assert isinstance(
|
||||||
|
value,
|
||||||
|
bool), "The value to set `static_shape_infer` must be type of bool."
|
||||||
|
self._preprocessor.static_shape_infer = value
|
||||||
|
|
||||||
|
def disable_normalize(self):
|
||||||
|
"""
|
||||||
|
This function will disable normalize in preprocessing step.
|
||||||
|
"""
|
||||||
|
self._preprocessor.disable_normalize()
|
||||||
|
|
||||||
|
def disable_permute(self):
|
||||||
|
"""
|
||||||
|
This function will disable hwc2chw in preprocessing step.
|
||||||
|
"""
|
||||||
|
self._preprocessor.disable_permute()
|
||||||
|
|
||||||
|
|
||||||
class DBDetectorPostprocessor:
|
class DBDetectorPostprocessor:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@@ -358,6 +381,18 @@ class ClassifierPreprocessor:
|
|||||||
list), "The value to set `cls_image_shape` must be type of list."
|
list), "The value to set `cls_image_shape` must be type of list."
|
||||||
self._preprocessor.cls_image_shape = value
|
self._preprocessor.cls_image_shape = value
|
||||||
|
|
||||||
|
def disable_normalize(self):
|
||||||
|
"""
|
||||||
|
This function will disable normalize in preprocessing step.
|
||||||
|
"""
|
||||||
|
self._preprocessor.disable_normalize()
|
||||||
|
|
||||||
|
def disable_permute(self):
|
||||||
|
"""
|
||||||
|
This function will disable hwc2chw in preprocessing step.
|
||||||
|
"""
|
||||||
|
self._preprocessor.disable_permute()
|
||||||
|
|
||||||
|
|
||||||
class ClassifierPostprocessor:
|
class ClassifierPostprocessor:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@@ -581,6 +616,18 @@ class RecognizerPreprocessor:
|
|||||||
list), "The value to set `rec_image_shape` must be type of list."
|
list), "The value to set `rec_image_shape` must be type of list."
|
||||||
self._preprocessor.rec_image_shape = value
|
self._preprocessor.rec_image_shape = value
|
||||||
|
|
||||||
|
def disable_normalize(self):
|
||||||
|
"""
|
||||||
|
This function will disable normalize in preprocessing step.
|
||||||
|
"""
|
||||||
|
self._preprocessor.disable_normalize()
|
||||||
|
|
||||||
|
def disable_permute(self):
|
||||||
|
"""
|
||||||
|
This function will disable hwc2chw in preprocessing step.
|
||||||
|
"""
|
||||||
|
self._preprocessor.disable_permute()
|
||||||
|
|
||||||
|
|
||||||
class RecognizerPostprocessor:
|
class RecognizerPostprocessor:
|
||||||
def __init__(self, label_path):
|
def __init__(self, label_path):
|
||||||
|
15
tools/rknpu2/config/ppocrv3_cls.yaml
Normal file
15
tools/rknpu2/config/ppocrv3_cls.yaml
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
mean:
|
||||||
|
-
|
||||||
|
- 127.5
|
||||||
|
- 127.5
|
||||||
|
- 127.5
|
||||||
|
std:
|
||||||
|
-
|
||||||
|
- 127.5
|
||||||
|
- 127.5
|
||||||
|
- 127.5
|
||||||
|
model_path: ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx
|
||||||
|
outputs_nodes:
|
||||||
|
do_quantization: False
|
||||||
|
dataset:
|
||||||
|
output_folder: "./ch_ppocr_mobile_v2.0_cls_infer"
|
15
tools/rknpu2/config/ppocrv3_det.yaml
Normal file
15
tools/rknpu2/config/ppocrv3_det.yaml
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
mean:
|
||||||
|
-
|
||||||
|
- 123.675
|
||||||
|
- 116.28
|
||||||
|
- 103.53
|
||||||
|
std:
|
||||||
|
-
|
||||||
|
- 58.395
|
||||||
|
- 57.12
|
||||||
|
- 57.375
|
||||||
|
model_path: ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx
|
||||||
|
outputs_nodes:
|
||||||
|
do_quantization: False
|
||||||
|
dataset:
|
||||||
|
output_folder: "./ch_PP-OCRv3_det_infer"
|
15
tools/rknpu2/config/ppocrv3_rec.yaml
Normal file
15
tools/rknpu2/config/ppocrv3_rec.yaml
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
mean:
|
||||||
|
-
|
||||||
|
- 127.5
|
||||||
|
- 127.5
|
||||||
|
- 127.5
|
||||||
|
std:
|
||||||
|
-
|
||||||
|
- 127.5
|
||||||
|
- 127.5
|
||||||
|
- 127.5
|
||||||
|
model_path: ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx
|
||||||
|
outputs_nodes:
|
||||||
|
do_quantization: False
|
||||||
|
dataset:
|
||||||
|
output_folder: "./ch_PP-OCRv3_rec_infer"
|
@@ -65,7 +65,10 @@ if __name__ == "__main__":
|
|||||||
if not os.path.exists(yaml_config["output_folder"]):
|
if not os.path.exists(yaml_config["output_folder"]):
|
||||||
os.mkdir(yaml_config["output_folder"])
|
os.mkdir(yaml_config["output_folder"])
|
||||||
|
|
||||||
model_base_name = os.path.basename(yaml_config["model_path"]).split(".")[0]
|
name_list = os.path.basename(yaml_config["model_path"]).split(".")
|
||||||
|
model_base_name = ""
|
||||||
|
for name in name_list[0:-1]:
|
||||||
|
model_base_name += name
|
||||||
model_device_name = config.target_platform.lower()
|
model_device_name = config.target_platform.lower()
|
||||||
if yaml_config["do_quantization"]:
|
if yaml_config["do_quantization"]:
|
||||||
model_save_name = model_base_name + "_" + model_device_name + "_quantized" + ".rknn"
|
model_save_name = model_base_name + "_" + model_device_name + "_quantized" + ".rknn"
|
||||||
|
Reference in New Issue
Block a user