[Hackathon 182 Model] Update PPOCRV3 For RKNPU2 (#1403)

* update ppocrv3 for rknpu2

* add config

* add config

* detele unuseful

* update useful results

* Repair note

* Repair note

* fixed bugs

* update
This commit is contained in:
Zheng-Bicheng
2023-02-27 15:01:17 +08:00
committed by GitHub
parent b6e8773b2f
commit 8c3ccc2cc2
27 changed files with 958 additions and 132 deletions

View File

@@ -131,9 +131,9 @@ endif()
if(ENABLE_RKNPU2_BACKEND) if(ENABLE_RKNPU2_BACKEND)
if(RKNN2_TARGET_SOC STREQUAL "RK356X") if(RKNN2_TARGET_SOC STREQUAL "RK356X")
set(RKNPU2_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/rknpu2_runtime/RK356X/lib/librknn_api.so) set(RKNPU2_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/rknpu2_runtime/lib/librknnrt.so)
elseif (RKNN2_TARGET_SOC STREQUAL "RK3588") elseif (RKNN2_TARGET_SOC STREQUAL "RK3588")
set(RKNPU2_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/rknpu2_runtime/RK3588/lib/librknn_api.so) set(RKNPU2_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/rknpu2_runtime/lib/librknnrt.so)
else () else ()
message(FATAL_ERROR "RKNN2_TARGET_SOC is not set, ref value: RK356X or RK3588") message(FATAL_ERROR "RKNN2_TARGET_SOC is not set, ref value: RK356X or RK3588")
endif() endif()

View File

@@ -1,7 +1,7 @@
# get RKNPU2_URL # get RKNPU2_URL
set(RKNPU2_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/") set(RKNPU2_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/")
set(RKNPU2_VERSION "1.4.0") set(RKNPU2_VERSION "1.4.2b0")
set(RKNPU2_FILE "rknpu2_runtime-linux-x64-${RKNPU2_VERSION}.tgz") set(RKNPU2_FILE "rknpu2_runtime-linux-aarch64-${RKNPU2_VERSION}-${RKNN2_TARGET_SOC}.tgz")
set(RKNPU2_URL "${RKNPU2_URL_BASE}${RKNPU2_FILE}") set(RKNPU2_URL "${RKNPU2_URL_BASE}${RKNPU2_FILE}")
# download_and_decompress # download_and_decompress
@@ -10,11 +10,12 @@ download_and_decompress(${RKNPU2_URL} ${CMAKE_CURRENT_BINARY_DIR}/${RKNPU2_FILE}
# set path # set path
set(RKNPU_RUNTIME_PATH ${THIRD_PARTY_PATH}/install/rknpu2_runtime) set(RKNPU_RUNTIME_PATH ${THIRD_PARTY_PATH}/install/rknpu2_runtime)
# include lib
if (EXISTS ${RKNPU_RUNTIME_PATH}) if (EXISTS ${RKNPU_RUNTIME_PATH})
set(RKNN_RT_LIB ${RKNPU_RUNTIME_PATH}/${RKNN2_TARGET_SOC}/lib/librknnrt.so) set(RKNN_RT_LIB ${RKNPU_RUNTIME_PATH}/lib/librknnrt.so)
include_directories(${RKNPU_RUNTIME_PATH}/${RKNN2_TARGET_SOC}/include) include_directories(${RKNPU_RUNTIME_PATH}/include)
else () else ()
message(FATAL_ERROR "[rknpu2.cmake] download_and_decompress rknpu2_runtime error") message(FATAL_ERROR "[rknpu2.cmake] RKNPU_RUNTIME_PATH does not exist.")
endif () endif ()

View File

@@ -0,0 +1,77 @@
# PaddleOCR 模型部署
## PaddleOCR为多个模型组合串联任务包含如下几个模型构成
* 文本检测 `DBDetector`
* [可选]方向分类 `Classifer` 用于调整进入文字识别前的图像方向
* 文字识别 `Recognizer` 用于从图像中识别出文字
根据不同场景, FastDeploy汇总提供如下OCR任务部署, 用户需同时下载3个模型与字典文件或2个分类器可选), 完成OCR整个预测流程
## PP-OCR 中英文系列模型
下表中的模型下载链接由PaddleOCR模型库提供, 详见[PP-OCR系列模型列表](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6/doc/doc_ch/models_list.md)
| OCR版本 | 文本框检测 | 方向分类模型 | 文字识别 | 字典文件 | 说明 |
|:-------------------|:---------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:--------------------------------------------------------|
| ch_PP-OCRv3[推荐] | [ch_PP-OCRv3_det](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar) | [ch_ppocr_mobile_v2.0_cls](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) | [ch_PP-OCRv3_rec](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar) | [ppocr_keys_v1.txt](https://bj.bcebos.com/paddlehub/fastdeploy/ppocr_keys_v1.txt) | OCRv3系列原始超轻量模型支持中英文、多语种文本检测 |
| en_PP-OCRv3[推荐] | [en_PP-OCRv3_det](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar) | [ch_ppocr_mobile_v2.0_cls](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) | [en_PP-OCRv3_rec](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tar) | [en_dict.txt](https://bj.bcebos.com/paddlehub/fastdeploy/en_dict.txt) | OCRv3系列原始超轻量模型支持英文与数字识别除检测模型和识别模型的训练数据与中文模型不同以外无其他区别 |
| ch_PP-OCRv2 | [ch_PP-OCRv2_det](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar) | [ch_ppocr_mobile_v2.0_cls](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) | [ch_PP-OCRv2_rec](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar) | [ppocr_keys_v1.txt](https://bj.bcebos.com/paddlehub/fastdeploy/ppocr_keys_v1.txt) | OCRv2系列原始超轻量模型支持中英文、多语种文本检测 |
| ch_PP-OCRv2_mobile | [ch_ppocr_mobile_v2.0_det](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) | [ch_ppocr_mobile_v2.0_cls](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) | [ch_ppocr_mobile_v2.0_rec](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) | [ppocr_keys_v1.txt](https://bj.bcebos.com/paddlehub/fastdeploy/ppocr_keys_v1.txt) | OCRv2系列原始超轻量模型支持中英文、多语种文本检测,比PPOCRv2更加轻量 |
| ch_PP-OCRv2_server | [ch_ppocr_server_v2.0_det](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) | [ch_ppocr_mobile_v2.0_cls](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) | [ch_ppocr_server_v2.0_rec](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) | [ppocr_keys_v1.txt](https://bj.bcebos.com/paddlehub/fastdeploy/ppocr_keys_v1.txt) | OCRv2服务器系列模型, 支持中英文、多语种文本检测,比超轻量模型更大,但效果更好 |
## 模型转换
在RKNPU2上使用PPOCR时我们需要把Paddle静态图模型转为RKNN模型。
### 静态图模型转RKNN格式模型
rknn_toolkit2工具暂不支持直接从Paddle静态图模型直接转换为RKNN模型因此我们需要先将Paddle静态图模型转为RKNN模型。
```bash
# 下载模型和字典文件
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar
tar -xvf ch_PP-OCRv3_det_infer.tar
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar
tar -xvf ch_ppocr_mobile_v2.0_cls_infer.tar
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar
tar -xvf ch_PP-OCRv3_rec_infer.tar
# 转换模型到ONNX格式的模型
paddle2onnx --model_dir ch_PP-OCRv3_det_infer \
--model_filename inference.pdmodel \
--params_filename inference.pdiparams \
--save_file ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \
--enable_dev_version True
paddle2onnx --model_dir ch_ppocr_mobile_v2.0_cls_infer \
--model_filename inference.pdmodel \
--params_filename inference.pdiparams \
--save_file ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \
--enable_dev_version True
paddle2onnx --model_dir ch_PP-OCRv3_rec_infer \
--model_filename inference.pdmodel \
--params_filename inference.pdiparams \
--save_file ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \
--enable_dev_version True
# 固定模型的输入shape
python -m paddle2onnx.optimize --input_model ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \
--output_model ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \
--input_shape_dict "{'x':[1,3,960,960]}"
python -m paddle2onnx.optimize --input_model ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \
--output_model ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \
--input_shape_dict "{'x':[1,3,48,192]}"
python -m paddle2onnx.optimize --input_model ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \
--output_model ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \
--input_shape_dict "{'x':[1,3,48,320]}"
# 转换ONNX模型到RKNN模型
python tools/rknpu2/export.py --config_path tools/rknpu2/config/ppocrv3_det.yaml \
--target_platform rk3588
python tools/rknpu2/export.py --config_path tools/rknpu2/config/ppocrv3_rec.yaml \
--target_platform rk3588
python tools/rknpu2/export.py --config_path tools/rknpu2/config/ppocrv3_cls.yaml \
--target_platform rk3588
```

View File

@@ -0,0 +1,14 @@
PROJECT(infer_demo C CXX)
CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
# 指定下载解压后的fastdeploy库路径
option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
# 添加FastDeploy依赖头文件
include_directories(${FASTDEPLOY_INCS})
add_executable(infer_static_shape_demo ${PROJECT_SOURCE_DIR}/infer_static_shape.cc)
# 添加FastDeploy库依赖
target_link_libraries(infer_static_shape_demo ${FASTDEPLOY_LIBS})

View File

@@ -0,0 +1,55 @@
English | [简体中文](README_CN.md)
# PPOCRv3 C++ Deployment Example
This directory provides examples that `infer.cc` fast finishes the deployment of PPOCRv3 on CPU/GPU and GPU accelerated by TensorRT.
Two steps before deployment
- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)
- 2. Download the precompiled deployment library and samples code according to your development environment. Refer to [FastDeploy Precompiled Library](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)
Taking the CPU inference on Linux as an example, the compilation test can be completed by executing the following command in this directory. FastDeploy version 0.7.0 or above (x.x.x>=0.7.0) is required to support this model.
```
mkdir build
cd build
# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above
cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
make -j
# Download model, image, and dictionary files
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt
# CPU推理
./infer_static_shape_demo ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \
./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \
./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \
./ppocr_keys_v1.txt \
./12.jpg \
0
# RKNPU推理
./infer_static_shape_demo ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer_rk3588_unquantized.rknn \
./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v20_cls_infer_rk3588_unquantized.rknn \
./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer_rk3588_unquantized.rknn \
./ppocr_keys_v1.txt \
./12.jpg \
1
```
The above command works for Linux or MacOS. For SDK in Windows, refer to:
- [How to use FastDeploy C++ SDK in Windows](../../../../../docs/cn/faq/use_sdk_on_windows.md)
The visualized result after running is as follows
<img width="640" src="https://user-images.githubusercontent.com/109218879/185826024-f7593a0c-1bd2-4a60-b76c-15588484fa08.jpg">
## Other Documents
- [C++ API Reference](https://baidu-paddle.github.io/fastdeploy-api/cpp/html/)
- [PPOCR Model Description](../../)
- [PPOCRv3 Python Deployment](../python)
- [Model Prediction Results](../../../../../../docs/en/faq/how_to_change_backend.md)
- [How to switch the model inference backend engine](../../../../../../docs/en/faq/how_to_change_backend.md)

View File

@@ -0,0 +1,63 @@
[English](README_CN.md) | 简体中文
# PPOCRv3 C++部署示例
本目录下提供`infer.cc`快速完成PPOCRv3在CPU/GPU以及GPU上通过TensorRT加速部署的示例。
在部署前,需确认你已经成功完成以下两个操作:
* [正确编译FastDeploy SDK](../../../../../../docs/cn/faq/rknpu2/build.md).
* [成功转换模型](../README.md).
在本目录执行如下命令即可完成编译测试支持此模型需保证FastDeploy版本1.0.3以上(x.x.x>1.0.3), RKNN版本在1.4.1b22以上。
```
mkdir build
cd build
cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
make -j
# 下载图片和字典文件
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt
# 拷贝RKNN模型到build目录
# CPU推理
./infer_static_shape_demo ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \
./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \
./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \
./ppocr_keys_v1.txt \
./12.jpg \
0
# RKNPU推理
./infer_static_shape_demo ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer_rk3588_unquantized.rknn \
./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v20_cls_infer_rk3588_unquantized.rknn \
./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer_rk3588_unquantized.rknn \
./ppocr_keys_v1.txt \
./12.jpg \
1
```
运行完成可视化结果如下图所示:
<img width="640" src="https://user-images.githubusercontent.com/109218879/185826024-f7593a0c-1bd2-4a60-b76c-15588484fa08.jpg">
结果输出如下:
```text
det boxes: [[276,174],[285,173],[285,178],[276,179]]rec text: rec score:0.000000 cls label: 1 cls score: 0.766602
det boxes: [[43,408],[483,390],[483,431],[44,449]]rec text: 上海斯格威铂尔曼大酒店 rec score:0.888450 cls label: 0 cls score: 1.000000
det boxes: [[186,456],[399,448],[399,480],[186,488]]rec text: 打浦路15号 rec score:0.988769 cls label: 0 cls score: 1.000000
det boxes: [[18,501],[513,485],[514,537],[18,554]]rec text: 绿洲仕格维花园公寓 rec score:0.992730 cls label: 0 cls score: 1.000000
det boxes: [[78,553],[404,541],[404,573],[78,585]]rec text: 打浦路252935号 rec score:0.983545 cls label: 0 cls score: 1.000000
Visualized result saved in ./vis_result.jpg
```
## 其它文档
- [C++ API查阅](https://baidu-paddle.github.io/fastdeploy-api/cpp/html/)
- [PPOCR 系列模型介绍](../../../README_CN.md)
- [PPOCRv3 Python部署](../python)
- [模型预测结果说明](../../../../../../docs/cn/faq/how_to_change_backend.md)

View File

@@ -0,0 +1,126 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/vision.h"
void InitAndInfer(const std::string &det_model_file,
const std::string &cls_model_file,
const std::string &rec_model_file,
const std::string &rec_label_file,
const std::string &image_file,
const fastdeploy::RuntimeOption &option,
const fastdeploy::ModelFormat &format) {
auto det_params_file = "";
auto cls_params_file = "";
auto rec_params_file = "";
auto det_option = option;
auto cls_option = option;
auto rec_option = option;
if (format == fastdeploy::ONNX) {
std::cout << "ONNX Model" << std::endl;
}
auto det_model = fastdeploy::vision::ocr::DBDetector(
det_model_file, det_params_file, det_option, format);
auto cls_model = fastdeploy::vision::ocr::Classifier(
cls_model_file, cls_params_file, cls_option, format);
auto rec_model = fastdeploy::vision::ocr::Recognizer(
rec_model_file, rec_params_file, rec_label_file, rec_option, format);
if (format == fastdeploy::RKNN) {
cls_model.GetPreprocessor().DisableNormalize();
cls_model.GetPreprocessor().DisablePermute();
det_model.GetPreprocessor().DisableNormalize();
det_model.GetPreprocessor().DisablePermute();
rec_model.GetPreprocessor().DisableNormalize();
rec_model.GetPreprocessor().DisablePermute();
}
det_model.GetPreprocessor().SetStaticShapeInfer(true);
rec_model.GetPreprocessor().SetStaticShapeInfer(true);
assert(det_model.Initialized());
assert(cls_model.Initialized());
assert(rec_model.Initialized());
// The classification model is optional, so the PP-OCR can also be connected
// in series as follows auto ppocr_v3 =
// fastdeploy::pipeline::PPOCRv3(&det_model, &rec_model);
auto ppocr_v3 =
fastdeploy::pipeline::PPOCRv3(&det_model, &cls_model, &rec_model);
// When users enable static shape infer for rec model, the batch size of cls
// and rec model must to be set to 1.
ppocr_v3.SetClsBatchSize(1);
ppocr_v3.SetRecBatchSize(1);
if (!ppocr_v3.Initialized()) {
std::cerr << "Failed to initialize PP-OCR." << std::endl;
return;
}
auto im = cv::imread(image_file);
fastdeploy::vision::OCRResult result;
if (!ppocr_v3.Predict(im, &result)) {
std::cerr << "Failed to predict." << std::endl;
return;
}
std::cout << result.Str() << std::endl;
auto vis_im = fastdeploy::vision::VisOcr(im, result);
cv::imwrite("vis_result.jpg", vis_im);
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
}
int main(int argc, char *argv[]) {
if (argc < 7) {
std::cout << "Usage: infer_demo path/to/det_model path/to/cls_model "
"path/to/rec_model path/to/rec_label_file path/to/image "
"run_option, "
"e.g ./infer_demo ./ch_PP-OCRv3_det_infer "
"./ch_ppocr_mobile_v2.0_cls_infer ./ch_PP-OCRv3_rec_infer "
"./ppocr_keys_v1.txt ./12.jpg 0"
<< std::endl;
std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
"with ascend."
<< std::endl;
return -1;
}
fastdeploy::RuntimeOption option;
fastdeploy::ModelFormat format;
int flag = std::atoi(argv[6]);
if (flag == 0) {
option.UseCpu();
format = fastdeploy::ONNX;
} else if (flag == 1) {
option.UseRKNPU2();
format = fastdeploy::RKNN;
}
std::string det_model_dir = argv[1];
std::string cls_model_dir = argv[2];
std::string rec_model_dir = argv[3];
std::string rec_label_file = argv[4];
std::string test_image = argv[5];
InitAndInfer(det_model_dir, cls_model_dir, rec_model_dir, rec_label_file,
test_image, option, format);
return 0;
}

View File

@@ -0,0 +1,49 @@
English | [简体中文](README_CN.md)
# PPOCRv3 Python Deployment Example
Two steps before deployment
- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)
- 2. Install FastDeploy Python whl package. Refer to [FastDeploy Python Installation](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)
This directory provides examples that `infer.py` fast finishes the deployment of PPOCRv3 on CPU/GPU and GPU accelerated by TensorRT. The script is as follows
```
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt
# Download the example code for deployment
git clone https://github.com/PaddlePaddle/FastDeploy.git
cd examples/vision/ocr/PP-OCRv3/python/
python3 infer_static_shape.py \
--det_model ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \
--cls_model ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \
--rec_model ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \
--rec_label_file ./ppocr_keys_v1.txt \
--image 12.jpg \
--device cpu
# NPU推理
python3 infer_static_shape.py \
--det_model ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer_rk3588_unquantized.rknn \
--cls_model ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v20_cls_infer_rk3588_unquantized.rknn \
--rec_model ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer_rk3588_unquantized.rknn \
--rec_label_file ppocr_keys_v1.txt \
--image 12.jpg \
--device npu
```
The visualized result after running is as follows
<img width="640" src="https://user-images.githubusercontent.com/109218879/185826024-f7593a0c-1bd2-4a60-b76c-15588484fa08.jpg">
## Other Documents
- [Python API reference](https://baidu-paddle.github.io/fastdeploy-api/python/html/)
- [PPOCR Model Description](../../)
- [PPOCRv3 C++ Deployment](../cpp)
- [Model Prediction Results](../../../../../../docs/api/vision_results/)
- [How to switch the model inference backend engine](../../../../../../docs/en/faq/how_to_change_backend.md)

View File

@@ -0,0 +1,62 @@
[English](README.md) | 简体中文
# PPOCRv3 Python部署示例
在部署前,需确认以下两个步骤
- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
- 2. FastDeploy Python whl包安装参考[FastDeploy Python安装](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
本目录下提供`infer.py`快速完成PPOCRv3在CPU/GPU以及GPU上通过TensorRT加速部署的示例。执行如下脚本即可完成
```
# 下载模型,图片和字典文件
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar
tar xvf ch_PP-OCRv3_det_infer.tar
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar
tar -xvf ch_ppocr_mobile_v2.0_cls_infer.tar
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar
tar xvf ch_PP-OCRv3_rec_infer.tar
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt
#下载部署示例代码
git clone https://github.com/PaddlePaddle/FastDeploy.git
cd examples/vision/ocr/PP-OCRv3/python/
# CPU推理
python3 infer_static_shape.py \
--det_model ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx \
--cls_model ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx \
--rec_model ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx \
--rec_label_file ./ppocr_keys_v1.txt \
--image 12.jpg \
--device cpu
# NPU推理
python3 infer_static_shape.py \
--det_model ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer_rk3588_unquantized.rknn \
--cls_model ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v20_cls_infer_rk3588_unquantized.rknn \
--rec_model ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer_rk3588_unquantized.rknn \
--rec_label_file ppocr_keys_v1.txt \
--image 12.jpg \
--device npu
```
运行完成可视化结果如下图所示
<img width="640" src="https://user-images.githubusercontent.com/109218879/185826024-f7593a0c-1bd2-4a60-b76c-15588484fa08.jpg">
## 其它文档
- [Python API文档查阅](https://baidu-paddle.github.io/fastdeploy-api/python/html/)
- [PPOCR 系列模型介绍](../../)
- [PPOCRv3 C++部署](../cpp)
- [模型预测结果说明](../../../../../../docs/api/vision_results/)
- [如何切换模型推理后端引擎](../../../../../../docs/cn/faq/how_to_change_backend.md)

View File

@@ -0,0 +1,144 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import fastdeploy as fd
import cv2
import os
def parse_arguments():
import argparse
import ast
parser = argparse.ArgumentParser()
parser.add_argument(
"--det_model", required=True, help="Path of Detection model of PPOCR.")
parser.add_argument(
"--cls_model",
required=True,
help="Path of Classification model of PPOCR.")
parser.add_argument(
"--rec_model",
required=True,
help="Path of Recognization model of PPOCR.")
parser.add_argument(
"--rec_label_file",
required=True,
help="Path of Recognization model of PPOCR.")
parser.add_argument(
"--image", type=str, required=True, help="Path of test image file.")
parser.add_argument(
"--device",
type=str,
default='cpu',
help="Type of inference device, support 'cpu', 'kunlunxin' or 'gpu'.")
parser.add_argument(
"--cpu_thread_num",
type=int,
default=9,
help="Number of threads while inference on CPU.")
return parser.parse_args()
def build_option(args):
det_option = fd.RuntimeOption()
cls_option = fd.RuntimeOption()
rec_option = fd.RuntimeOption()
if args.device == "npu":
det_option.use_rknpu2()
cls_option.use_rknpu2()
rec_option.use_rknpu2()
return det_option, cls_option, rec_option
def build_format(args):
det_format = fd.ModelFormat.ONNX
cls_format = fd.ModelFormat.ONNX
rec_format = fd.ModelFormat.ONNX
if args.device == "npu":
det_format = fd.ModelFormat.RKNN
cls_format = fd.ModelFormat.RKNN
rec_format = fd.ModelFormat.RKNN
return det_format, cls_format, rec_format
args = parse_arguments()
# Detection模型, 检测文字框
det_model_file = args.det_model
det_params_file = ""
# Classification模型方向分类可选
cls_model_file = args.cls_model
cls_params_file = ""
# Recognition模型文字识别模型
rec_model_file = args.rec_model
rec_params_file = ""
rec_label_file = args.rec_label_file
det_option, cls_option, rec_option = build_option(args)
det_format, cls_format, rec_format = build_format(args)
det_model = fd.vision.ocr.DBDetector(
det_model_file,
det_params_file,
runtime_option=det_option,
model_format=det_format)
cls_model = fd.vision.ocr.Classifier(
cls_model_file,
cls_params_file,
runtime_option=cls_option,
model_format=cls_format)
rec_model = fd.vision.ocr.Recognizer(
rec_model_file,
rec_params_file,
rec_label_file,
runtime_option=rec_option,
model_format=rec_format)
# Det,Rec模型启用静态shape推理
det_model.preprocessor.static_shape_infer = True
rec_model.preprocessor.static_shape_infer = True
if args.device == "npu":
det_model.preprocessor.disable_normalize()
det_model.preprocessor.disable_permute()
cls_model.preprocessor.disable_normalize()
cls_model.preprocessor.disable_permute()
rec_model.preprocessor.disable_normalize()
rec_model.preprocessor.disable_permute()
# 创建PP-OCR串联3个模型其中cls_model可选如无需求可设置为None
ppocr_v3 = fd.vision.ocr.PPOCRv3(
det_model=det_model, cls_model=cls_model, rec_model=rec_model)
# Cls模型和Rec模型的batch size 必须设置为1, 开启静态shape推理
ppocr_v3.cls_batch_size = 1
ppocr_v3.rec_batch_size = 1
# 预测图片准备
im = cv2.imread(args.image)
#预测并打印结果
result = ppocr_v3.predict(im)
print(result)
# 可视化结果
vis_im = fd.vision.vis_ppocr(im, result)
cv2.imwrite("visualized_result.jpg", vis_im)
print("Visualized result save in ./visualized_result.jpg")

35
fastdeploy/vision/ocr/ppocr/classifier.cc Executable file → Normal file
View File

@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "fastdeploy/vision/ocr/ppocr/classifier.h" #include "fastdeploy/vision/ocr/ppocr/classifier.h"
#include "fastdeploy/utils/perf.h" #include "fastdeploy/utils/perf.h"
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h" #include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
@@ -26,15 +27,16 @@ Classifier::Classifier(const std::string& model_file,
const RuntimeOption& custom_option, const RuntimeOption& custom_option,
const ModelFormat& model_format) { const ModelFormat& model_format) {
if (model_format == ModelFormat::ONNX) { if (model_format == ModelFormat::ONNX) {
valid_cpu_backends = {Backend::ORT, valid_cpu_backends = {Backend::ORT, Backend::OPENVINO};
Backend::OPENVINO}; valid_gpu_backends = {Backend::ORT, Backend::TRT};
valid_gpu_backends = {Backend::ORT, Backend::TRT};
} else { } else {
valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO, Backend::LITE}; valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO,
Backend::LITE};
valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
valid_kunlunxin_backends = {Backend::LITE}; valid_kunlunxin_backends = {Backend::LITE};
valid_ascend_backends = {Backend::LITE}; valid_ascend_backends = {Backend::LITE};
valid_sophgonpu_backends = {Backend::SOPHGOTPU}; valid_sophgonpu_backends = {Backend::SOPHGOTPU};
valid_rknpu_backends = {Backend::RKNPU2};
} }
runtime_option = custom_option; runtime_option = custom_option;
runtime_option.model_format = model_format; runtime_option.model_format = model_format;
@@ -54,16 +56,18 @@ bool Classifier::Initialize() {
} }
std::unique_ptr<Classifier> Classifier::Clone() const { std::unique_ptr<Classifier> Classifier::Clone() const {
std::unique_ptr<Classifier> clone_model = utils::make_unique<Classifier>(Classifier(*this)); std::unique_ptr<Classifier> clone_model =
utils::make_unique<Classifier>(Classifier(*this));
clone_model->SetRuntime(clone_model->CloneRuntime()); clone_model->SetRuntime(clone_model->CloneRuntime());
return clone_model; return clone_model;
} }
bool Classifier::Predict(const cv::Mat& img, int32_t* cls_label, float* cls_score) { bool Classifier::Predict(const cv::Mat& img, int32_t* cls_label,
float* cls_score) {
std::vector<int32_t> cls_labels(1); std::vector<int32_t> cls_labels(1);
std::vector<float> cls_scores(1); std::vector<float> cls_scores(1);
bool success = BatchPredict({img}, &cls_labels, &cls_scores); bool success = BatchPredict({img}, &cls_labels, &cls_scores);
if(!success){ if (!success) {
return success; return success;
} }
*cls_label = cls_labels[0]; *cls_label = cls_labels[0];
@@ -72,16 +76,19 @@ bool Classifier::Predict(const cv::Mat& img, int32_t* cls_label, float* cls_scor
} }
bool Classifier::BatchPredict(const std::vector<cv::Mat>& images, bool Classifier::BatchPredict(const std::vector<cv::Mat>& images,
std::vector<int32_t>* cls_labels, std::vector<float>* cls_scores) { std::vector<int32_t>* cls_labels,
std::vector<float>* cls_scores) {
return BatchPredict(images, cls_labels, cls_scores, 0, images.size()); return BatchPredict(images, cls_labels, cls_scores, 0, images.size());
} }
bool Classifier::BatchPredict(const std::vector<cv::Mat>& images, bool Classifier::BatchPredict(const std::vector<cv::Mat>& images,
std::vector<int32_t>* cls_labels, std::vector<float>* cls_scores, std::vector<int32_t>* cls_labels,
std::vector<float>* cls_scores,
size_t start_index, size_t end_index) { size_t start_index, size_t end_index) {
size_t total_size = images.size(); size_t total_size = images.size();
std::vector<FDMat> fd_images = WrapMat(images); std::vector<FDMat> fd_images = WrapMat(images);
if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, start_index, end_index)) { if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, start_index,
end_index)) {
FDERROR << "Failed to preprocess the input image." << std::endl; FDERROR << "Failed to preprocess the input image." << std::endl;
return false; return false;
} }
@@ -91,13 +98,15 @@ bool Classifier::BatchPredict(const std::vector<cv::Mat>& images,
return false; return false;
} }
if (!postprocessor_.Run(reused_output_tensors_, cls_labels, cls_scores, start_index, total_size)) { if (!postprocessor_.Run(reused_output_tensors_, cls_labels, cls_scores,
FDERROR << "Failed to postprocess the inference cls_results by runtime." << std::endl; start_index, total_size)) {
FDERROR << "Failed to postprocess the inference cls_results by runtime."
<< std::endl;
return false; return false;
} }
return true; return true;
} }
} // namesapce ocr } // namespace ocr
} // namespace vision } // namespace vision
} // namespace fastdeploy } // namespace fastdeploy

30
fastdeploy/vision/ocr/ppocr/cls_preprocessor.cc Executable file → Normal file
View File

@@ -13,9 +13,10 @@
// limitations under the License. // limitations under the License.
#include "fastdeploy/vision/ocr/ppocr/cls_preprocessor.h" #include "fastdeploy/vision/ocr/ppocr/cls_preprocessor.h"
#include "fastdeploy/function/concat.h"
#include "fastdeploy/utils/perf.h" #include "fastdeploy/utils/perf.h"
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h" #include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
#include "fastdeploy/function/concat.h"
namespace fastdeploy { namespace fastdeploy {
namespace vision { namespace vision {
@@ -38,34 +39,43 @@ void OcrClassifierResizeImage(FDMat* mat,
Resize::Run(mat, resize_w, img_h); Resize::Run(mat, resize_w, img_h);
} }
bool ClassifierPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs) { bool ClassifierPreprocessor::Run(std::vector<FDMat>* images,
std::vector<FDTensor>* outputs) {
return Run(images, outputs, 0, images->size()); return Run(images, outputs, 0, images->size());
} }
bool ClassifierPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs, bool ClassifierPreprocessor::Run(std::vector<FDMat>* images,
std::vector<FDTensor>* outputs,
size_t start_index, size_t end_index) { size_t start_index, size_t end_index) {
if (images->size() == 0 || start_index < 0 || end_index <= start_index ||
if (images->size() == 0 || start_index <0 || end_index <= start_index || end_index > images->size()) { end_index > images->size()) {
FDERROR << "images->size() or index error. Correct is: 0 <= start_index < end_index <= images->size()" << std::endl; FDERROR << "images->size() or index error. Correct is: 0 <= start_index < "
"end_index <= images->size()"
<< std::endl;
return false; return false;
} }
for (size_t i = start_index; i < end_index; ++i) { for (size_t i = start_index; i < end_index; ++i) {
FDMat* mat = &(images->at(i)); FDMat* mat = &(images->at(i));
OcrClassifierResizeImage(mat, cls_image_shape_); OcrClassifierResizeImage(mat, cls_image_shape_);
Normalize::Run(mat, mean_, scale_, is_scale_); if (!disable_normalize_) {
Normalize::Run(mat, mean_, scale_, is_scale_);
}
std::vector<float> value = {0, 0, 0}; std::vector<float> value = {0, 0, 0};
if (mat->Width() < cls_image_shape_[2]) { if (mat->Width() < cls_image_shape_[2]) {
Pad::Run(mat, 0, 0, 0, cls_image_shape_[2] - mat->Width(), value); Pad::Run(mat, 0, 0, 0, cls_image_shape_[2] - mat->Width(), value);
} }
HWC2CHW::Run(mat);
Cast::Run(mat, "float"); if (!disable_permute_) {
HWC2CHW::Run(mat);
Cast::Run(mat, "float");
}
} }
// Only have 1 output Tensor. // Only have 1 output Tensor.
outputs->resize(1); outputs->resize(1);
// Concat all the preprocessed data to a batch tensor // Concat all the preprocessed data to a batch tensor
size_t tensor_size = end_index - start_index; size_t tensor_size = end_index - start_index;
std::vector<FDTensor> tensors(tensor_size); std::vector<FDTensor> tensors(tensor_size);
for (size_t i = 0; i < tensor_size; ++i) { for (size_t i = 0; i < tensor_size; ++i) {
(*images)[i + start_index].ShareWithTensor(&(tensors[i])); (*images)[i + start_index].ShareWithTensor(&(tensors[i]));
tensors[i].ExpandDim(0); tensors[i].ExpandDim(0);

View File

@@ -56,7 +56,16 @@ class FASTDEPLOY_DECL ClassifierPreprocessor {
/// Get cls_image_shape for the classification preprocess /// Get cls_image_shape for the classification preprocess
std::vector<int> GetClsImageShape() const { return cls_image_shape_; } std::vector<int> GetClsImageShape() const { return cls_image_shape_; }
/// This function will disable normalize in preprocessing step.
void DisableNormalize() { disable_permute_ = true; }
/// This function will disable hwc2chw in preprocessing step.
void DisablePermute() { disable_normalize_ = true; }
private: private:
// for recording the switch of hwc2chw
bool disable_permute_ = false;
// for recording the switch of normalize
bool disable_normalize_ = false;
std::vector<float> mean_ = {0.5f, 0.5f, 0.5f}; std::vector<float> mean_ = {0.5f, 0.5f, 0.5f};
std::vector<float> scale_ = {0.5f, 0.5f, 0.5f}; std::vector<float> scale_ = {0.5f, 0.5f, 0.5f};
bool is_scale_ = true; bool is_scale_ = true;

View File

@@ -36,6 +36,7 @@ DBDetector::DBDetector(const std::string& model_file,
valid_kunlunxin_backends = {Backend::LITE}; valid_kunlunxin_backends = {Backend::LITE};
valid_ascend_backends = {Backend::LITE}; valid_ascend_backends = {Backend::LITE};
valid_sophgonpu_backends = {Backend::SOPHGOTPU}; valid_sophgonpu_backends = {Backend::SOPHGOTPU};
valid_rknpu_backends = {Backend::RKNPU2};
} }
runtime_option = custom_option; runtime_option = custom_option;

View File

@@ -20,9 +20,13 @@ namespace fastdeploy {
namespace vision { namespace vision {
namespace ocr { namespace ocr {
std::array<int, 4> OcrDetectorGetInfo(FDMat* img, int max_size_len) { std::array<int, 4> DBDetectorPreprocessor::OcrDetectorGetInfo(
FDMat* img, int max_size_len) {
int w = img->Width(); int w = img->Width();
int h = img->Height(); int h = img->Height();
if (static_shape_infer_) {
return {w, h, det_image_shape_[2], det_image_shape_[1]};
}
float ratio = 1.f; float ratio = 1.f;
int max_wh = w >= h ? w : h; int max_wh = w >= h ? w : h;
@@ -86,7 +90,10 @@ bool DBDetectorPreprocessor::Apply(FDMatBatch* image_batch,
ResizeImage(mat, batch_det_img_info_[i][2], batch_det_img_info_[i][3], ResizeImage(mat, batch_det_img_info_[i][2], batch_det_img_info_[i][3],
max_resize_w, max_resize_h); max_resize_w, max_resize_h);
} }
(*normalize_permute_op_)(image_batch);
if (!disable_normalize_ && !disable_permute_) {
(*normalize_permute_op_)(image_batch);
}
outputs->resize(1); outputs->resize(1);
FDTensor* tensor = image_batch->Tensor(); FDTensor* tensor = image_batch->Tensor();

View File

@@ -59,14 +59,44 @@ class FASTDEPLOY_DECL DBDetectorPreprocessor : public ProcessorManager {
return &batch_det_img_info_; return &batch_det_img_info_;
} }
/// This function will disable normalize in preprocessing step.
void DisableNormalize() { disable_permute_ = true; }
/// This function will disable hwc2chw in preprocessing step.
void DisablePermute() { disable_normalize_ = true; }
/// Set det_image_shape for the detection preprocess.
/// This api is usually used when you retrain the model.
/// Generally, you do not need to use it.
void SetDetImageShape(const std::vector<int>& det_image_shape) {
det_image_shape_ = det_image_shape;
}
/// Get cls_image_shape for the classification preprocess
std::vector<int> GetDetImageShape() const { return det_image_shape_; }
/// Set static_shape_infer is true or not. When deploy PP-OCR
/// on hardware which can not support dynamic input shape very well,
/// like Huawei Ascned, static_shape_infer needs to to be true.
void SetStaticShapeInfer(bool static_shape_infer) {
static_shape_infer_ = static_shape_infer;
}
/// Get static_shape_infer of the recognition preprocess
bool GetStaticShapeInfer() const { return static_shape_infer_; }
private: private:
bool ResizeImage(FDMat* img, int resize_w, int resize_h, int max_resize_w, bool ResizeImage(FDMat* img, int resize_w, int resize_h, int max_resize_w,
int max_resize_h); int max_resize_h);
// for recording the switch of hwc2chw
bool disable_permute_ = false;
// for recording the switch of normalize
bool disable_normalize_ = false;
int max_side_len_ = 960; int max_side_len_ = 960;
std::vector<std::array<int, 4>> batch_det_img_info_; std::vector<std::array<int, 4>> batch_det_img_info_;
std::shared_ptr<Resize> resize_op_; std::shared_ptr<Resize> resize_op_;
std::shared_ptr<Pad> pad_op_; std::shared_ptr<Pad> pad_op_;
std::shared_ptr<NormalizeAndPermute> normalize_permute_op_; std::shared_ptr<NormalizeAndPermute> normalize_permute_op_;
std::vector<int> det_image_shape_ = {3, 960, 960};
bool static_shape_infer_ = false;
std::array<int, 4> OcrDetectorGetInfo(FDMat* img, int max_size_len);
}; };
} // namespace ocr } // namespace ocr

View File

@@ -26,6 +26,9 @@ void BindPPOCRModel(pybind11::module& m) {
pybind11::class_<vision::ocr::DBDetectorPreprocessor>( pybind11::class_<vision::ocr::DBDetectorPreprocessor>(
m, "DBDetectorPreprocessor") m, "DBDetectorPreprocessor")
.def(pybind11::init<>()) .def(pybind11::init<>())
.def_property("static_shape_infer",
&vision::ocr::DBDetectorPreprocessor::GetStaticShapeInfer,
&vision::ocr::DBDetectorPreprocessor::SetStaticShapeInfer)
.def_property("max_side_len", .def_property("max_side_len",
&vision::ocr::DBDetectorPreprocessor::GetMaxSideLen, &vision::ocr::DBDetectorPreprocessor::GetMaxSideLen,
&vision::ocr::DBDetectorPreprocessor::SetMaxSideLen) &vision::ocr::DBDetectorPreprocessor::SetMaxSideLen)
@@ -33,19 +36,27 @@ void BindPPOCRModel(pybind11::module& m) {
[](vision::ocr::DBDetectorPreprocessor& self, [](vision::ocr::DBDetectorPreprocessor& self,
const std::vector<float>& mean, const std::vector<float>& std, const std::vector<float>& mean, const std::vector<float>& std,
bool is_scale) { self.SetNormalize(mean, std, is_scale); }) bool is_scale) { self.SetNormalize(mean, std, is_scale); })
.def("run", [](vision::ocr::DBDetectorPreprocessor& self, .def("run",
std::vector<pybind11::array>& im_list) { [](vision::ocr::DBDetectorPreprocessor& self,
std::vector<vision::FDMat> images; std::vector<pybind11::array>& im_list) {
for (size_t i = 0; i < im_list.size(); ++i) { std::vector<vision::FDMat> images;
images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); for (size_t i = 0; i < im_list.size(); ++i) {
} images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
std::vector<FDTensor> outputs; }
self.Run(&images, &outputs); std::vector<FDTensor> outputs;
auto batch_det_img_info = self.GetBatchImgInfo(); self.Run(&images, &outputs);
for (size_t i = 0; i < outputs.size(); ++i) { auto batch_det_img_info = self.GetBatchImgInfo();
outputs[i].StopSharing(); for (size_t i = 0; i < outputs.size(); ++i) {
} outputs[i].StopSharing();
return std::make_pair(outputs, *batch_det_img_info); }
return std::make_pair(outputs, *batch_det_img_info);
})
.def("disable_normalize",
[](vision::ocr::DBDetectorPreprocessor& self) {
self.DisableNormalize();
})
.def("disable_permute", [](vision::ocr::DBDetectorPreprocessor& self) {
self.DisablePermute();
}); });
pybind11::class_<vision::ocr::DBDetectorPostprocessor>( pybind11::class_<vision::ocr::DBDetectorPostprocessor>(
@@ -135,21 +146,30 @@ void BindPPOCRModel(pybind11::module& m) {
.def_property("is_scale", .def_property("is_scale",
&vision::ocr::ClassifierPreprocessor::GetIsScale, &vision::ocr::ClassifierPreprocessor::GetIsScale,
&vision::ocr::ClassifierPreprocessor::SetIsScale) &vision::ocr::ClassifierPreprocessor::SetIsScale)
.def("run", [](vision::ocr::ClassifierPreprocessor& self, .def("run",
std::vector<pybind11::array>& im_list) { [](vision::ocr::ClassifierPreprocessor& self,
std::vector<vision::FDMat> images; std::vector<pybind11::array>& im_list) {
for (size_t i = 0; i < im_list.size(); ++i) { std::vector<vision::FDMat> images;
images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); for (size_t i = 0; i < im_list.size(); ++i) {
} images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
std::vector<FDTensor> outputs; }
if (!self.Run(&images, &outputs)) { std::vector<FDTensor> outputs;
throw std::runtime_error( if (!self.Run(&images, &outputs)) {
"Failed to preprocess the input data in ClassifierPreprocessor."); throw std::runtime_error(
} "Failed to preprocess the input data in "
for (size_t i = 0; i < outputs.size(); ++i) { "ClassifierPreprocessor.");
outputs[i].StopSharing(); }
} for (size_t i = 0; i < outputs.size(); ++i) {
return outputs; outputs[i].StopSharing();
}
return outputs;
})
.def("disable_normalize",
[](vision::ocr::ClassifierPreprocessor& self) {
self.DisableNormalize();
})
.def("disable_permute", [](vision::ocr::ClassifierPreprocessor& self) {
self.DisablePermute();
}); });
pybind11::class_<vision::ocr::ClassifierPostprocessor>( pybind11::class_<vision::ocr::ClassifierPostprocessor>(
@@ -229,21 +249,30 @@ void BindPPOCRModel(pybind11::module& m) {
.def_property("is_scale", .def_property("is_scale",
&vision::ocr::RecognizerPreprocessor::GetIsScale, &vision::ocr::RecognizerPreprocessor::GetIsScale,
&vision::ocr::RecognizerPreprocessor::SetIsScale) &vision::ocr::RecognizerPreprocessor::SetIsScale)
.def("run", [](vision::ocr::RecognizerPreprocessor& self, .def("run",
std::vector<pybind11::array>& im_list) { [](vision::ocr::RecognizerPreprocessor& self,
std::vector<vision::FDMat> images; std::vector<pybind11::array>& im_list) {
for (size_t i = 0; i < im_list.size(); ++i) { std::vector<vision::FDMat> images;
images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); for (size_t i = 0; i < im_list.size(); ++i) {
} images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
std::vector<FDTensor> outputs; }
if (!self.Run(&images, &outputs)) { std::vector<FDTensor> outputs;
throw std::runtime_error( if (!self.Run(&images, &outputs)) {
"Failed to preprocess the input data in RecognizerPreprocessor."); throw std::runtime_error(
} "Failed to preprocess the input data in "
for (size_t i = 0; i < outputs.size(); ++i) { "RecognizerPreprocessor.");
outputs[i].StopSharing(); }
} for (size_t i = 0; i < outputs.size(); ++i) {
return outputs; outputs[i].StopSharing();
}
return outputs;
})
.def("disable_normalize",
[](vision::ocr::RecognizerPreprocessor& self) {
self.DisableNormalize();
})
.def("disable_permute", [](vision::ocr::RecognizerPreprocessor& self) {
self.DisablePermute();
}); });
pybind11::class_<vision::ocr::RecognizerPostprocessor>( pybind11::class_<vision::ocr::RecognizerPostprocessor>(

View File

@@ -13,22 +13,23 @@
// limitations under the License. // limitations under the License.
#include "fastdeploy/vision/ocr/ppocr/rec_preprocessor.h" #include "fastdeploy/vision/ocr/ppocr/rec_preprocessor.h"
#include "fastdeploy/function/concat.h"
#include "fastdeploy/utils/perf.h" #include "fastdeploy/utils/perf.h"
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h" #include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
#include "fastdeploy/function/concat.h"
namespace fastdeploy { namespace fastdeploy {
namespace vision { namespace vision {
namespace ocr { namespace ocr {
void OcrRecognizerResizeImage(FDMat* mat, float max_wh_ratio, void OcrRecognizerResizeImage(FDMat* mat, float max_wh_ratio,
const std::vector<int>& rec_image_shape, bool static_shape_infer) { const std::vector<int>& rec_image_shape,
bool static_shape_infer) {
int img_h, img_w; int img_h, img_w;
img_h = rec_image_shape[1]; img_h = rec_image_shape[1];
img_w = rec_image_shape[2]; img_w = rec_image_shape[2];
if (!static_shape_infer) { if (!static_shape_infer) {
img_w = int(img_h * max_wh_ratio); img_w = int(img_h * max_wh_ratio);
float ratio = float(mat->Width()) / float(mat->Height()); float ratio = float(mat->Width()) / float(mat->Height());
@@ -43,23 +44,29 @@ void OcrRecognizerResizeImage(FDMat* mat, float max_wh_ratio,
} else { } else {
if (mat->Width() >= img_w) { if (mat->Width() >= img_w) {
Resize::Run(mat, img_w, img_h); // Reszie W to 320 Resize::Run(mat, img_w, img_h); // Reszie W to 320
} else { } else {
Resize::Run(mat, mat->Width(), img_h); Resize::Run(mat, mat->Width(), img_h);
Pad::Run(mat, 0, 0, 0, int(img_w - mat->Width()), {127, 127, 127}); Pad::Run(mat, 0, 0, 0, int(img_w - mat->Width()), {127, 127, 127});
// Pad to 320 // Pad to 320
} }
} }
} }
bool RecognizerPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs) { bool RecognizerPreprocessor::Run(std::vector<FDMat>* images,
std::vector<FDTensor>* outputs) {
return Run(images, outputs, 0, images->size(), {}); return Run(images, outputs, 0, images->size(), {});
} }
bool RecognizerPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs, bool RecognizerPreprocessor::Run(std::vector<FDMat>* images,
size_t start_index, size_t end_index, const std::vector<int>& indices) { std::vector<FDTensor>* outputs,
if (images->size() == 0 || end_index <= start_index || end_index > images->size()) { size_t start_index, size_t end_index,
FDERROR << "images->size() or index error. Correct is: 0 <= start_index < end_index <= images->size()" << std::endl; const std::vector<int>& indices) {
if (images->size() == 0 || end_index <= start_index ||
end_index > images->size()) {
FDERROR << "images->size() or index error. Correct is: 0 <= start_index < "
"end_index <= images->size()"
<< std::endl;
return false; return false;
} }
@@ -67,7 +74,7 @@ bool RecognizerPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTenso
int img_w = rec_image_shape_[2]; int img_w = rec_image_shape_[2];
float max_wh_ratio = img_w * 1.0 / img_h; float max_wh_ratio = img_w * 1.0 / img_h;
float ori_wh_ratio; float ori_wh_ratio;
for (size_t i = start_index; i < end_index; ++i) { for (size_t i = start_index; i < end_index; ++i) {
size_t real_index = i; size_t real_index = i;
if (indices.size() != 0) { if (indices.size() != 0) {
@@ -84,20 +91,31 @@ bool RecognizerPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTenso
real_index = indices[i]; real_index = indices[i];
} }
FDMat* mat = &(images->at(real_index)); FDMat* mat = &(images->at(real_index));
OcrRecognizerResizeImage(mat, max_wh_ratio, rec_image_shape_, static_shape_infer_); OcrRecognizerResizeImage(mat, max_wh_ratio, rec_image_shape_,
NormalizeAndPermute::Run(mat, mean_, scale_, is_scale_); static_shape_infer_);
if (!disable_normalize_ && !disable_permute_) {
NormalizeAndPermute::Run(mat, mean_, scale_, is_scale_);
} else {
if (!disable_normalize_) {
Normalize::Run(mat, mean_, scale_, is_scale_);
}
if (!disable_permute_) {
HWC2CHW::Run(mat);
Cast::Run(mat, "float");
}
}
} }
// Only have 1 output Tensor. // Only have 1 output Tensor.
outputs->resize(1); outputs->resize(1);
size_t tensor_size = end_index-start_index; size_t tensor_size = end_index - start_index;
// Concat all the preprocessed data to a batch tensor // Concat all the preprocessed data to a batch tensor
std::vector<FDTensor> tensors(tensor_size); std::vector<FDTensor> tensors(tensor_size);
for (size_t i = 0; i < tensor_size; ++i) { for (size_t i = 0; i < tensor_size; ++i) {
size_t real_index = i + start_index; size_t real_index = i + start_index;
if (indices.size() != 0) { if (indices.size() != 0) {
real_index = indices[i + start_index]; real_index = indices[i + start_index];
} }
(*images)[real_index].ShareWithTensor(&(tensors[i])); (*images)[real_index].ShareWithTensor(&(tensors[i]));
tensors[i].ExpandDim(0); tensors[i].ExpandDim(0);
} }

View File

@@ -66,7 +66,16 @@ class FASTDEPLOY_DECL RecognizerPreprocessor {
/// Get rec_image_shape for the recognition preprocess /// Get rec_image_shape for the recognition preprocess
std::vector<int> GetRecImageShape() { return rec_image_shape_; } std::vector<int> GetRecImageShape() { return rec_image_shape_; }
/// This function will disable normalize in preprocessing step.
void DisableNormalize() { disable_permute_ = true; }
/// This function will disable hwc2chw in preprocessing step.
void DisablePermute() { disable_normalize_ = true; }
private: private:
// for recording the switch of hwc2chw
bool disable_permute_ = false;
// for recording the switch of normalize
bool disable_normalize_ = false;
std::vector<int> rec_image_shape_ = {3, 48, 320}; std::vector<int> rec_image_shape_ = {3, 48, 320};
std::vector<float> mean_ = {0.5f, 0.5f, 0.5f}; std::vector<float> mean_ = {0.5f, 0.5f, 0.5f};
std::vector<float> scale_ = {0.5f, 0.5f, 0.5f}; std::vector<float> scale_ = {0.5f, 0.5f, 0.5f};

41
fastdeploy/vision/ocr/ppocr/recognizer.cc Executable file → Normal file
View File

@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "fastdeploy/vision/ocr/ppocr/recognizer.h" #include "fastdeploy/vision/ocr/ppocr/recognizer.h"
#include "fastdeploy/utils/perf.h" #include "fastdeploy/utils/perf.h"
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h" #include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
@@ -26,17 +27,19 @@ Recognizer::Recognizer(const std::string& model_file,
const std::string& params_file, const std::string& params_file,
const std::string& label_path, const std::string& label_path,
const RuntimeOption& custom_option, const RuntimeOption& custom_option,
const ModelFormat& model_format):postprocessor_(label_path) { const ModelFormat& model_format)
: postprocessor_(label_path) {
if (model_format == ModelFormat::ONNX) { if (model_format == ModelFormat::ONNX) {
valid_cpu_backends = {Backend::ORT, valid_cpu_backends = {Backend::ORT, Backend::OPENVINO};
Backend::OPENVINO}; valid_gpu_backends = {Backend::ORT, Backend::TRT};
valid_gpu_backends = {Backend::ORT, Backend::TRT};
} else { } else {
valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO, Backend::LITE}; valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO,
Backend::LITE};
valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
valid_kunlunxin_backends = {Backend::LITE}; valid_kunlunxin_backends = {Backend::LITE};
valid_ascend_backends = {Backend::LITE}; valid_ascend_backends = {Backend::LITE};
valid_sophgonpu_backends = {Backend::SOPHGOTPU}; valid_sophgonpu_backends = {Backend::SOPHGOTPU};
valid_rknpu_backends = {Backend::RKNPU2};
} }
runtime_option = custom_option; runtime_option = custom_option;
@@ -57,12 +60,14 @@ bool Recognizer::Initialize() {
} }
std::unique_ptr<Recognizer> Recognizer::Clone() const { std::unique_ptr<Recognizer> Recognizer::Clone() const {
std::unique_ptr<Recognizer> clone_model = utils::make_unique<Recognizer>(Recognizer(*this)); std::unique_ptr<Recognizer> clone_model =
utils::make_unique<Recognizer>(Recognizer(*this));
clone_model->SetRuntime(clone_model->CloneRuntime()); clone_model->SetRuntime(clone_model->CloneRuntime());
return clone_model; return clone_model;
} }
bool Recognizer::Predict(const cv::Mat& img, std::string* text, float* rec_score) { bool Recognizer::Predict(const cv::Mat& img, std::string* text,
float* rec_score) {
std::vector<std::string> texts(1); std::vector<std::string> texts(1);
std::vector<float> rec_scores(1); std::vector<float> rec_scores(1);
bool success = BatchPredict({img}, &texts, &rec_scores); bool success = BatchPredict({img}, &texts, &rec_scores);
@@ -75,20 +80,24 @@ bool Recognizer::Predict(const cv::Mat& img, std::string* text, float* rec_score
} }
bool Recognizer::BatchPredict(const std::vector<cv::Mat>& images, bool Recognizer::BatchPredict(const std::vector<cv::Mat>& images,
std::vector<std::string>* texts, std::vector<float>* rec_scores) { std::vector<std::string>* texts,
std::vector<float>* rec_scores) {
return BatchPredict(images, texts, rec_scores, 0, images.size(), {}); return BatchPredict(images, texts, rec_scores, 0, images.size(), {});
} }
bool Recognizer::BatchPredict(const std::vector<cv::Mat>& images, bool Recognizer::BatchPredict(const std::vector<cv::Mat>& images,
std::vector<std::string>* texts, std::vector<float>* rec_scores, std::vector<std::string>* texts,
size_t start_index, size_t end_index, const std::vector<int>& indices) { std::vector<float>* rec_scores,
size_t start_index, size_t end_index,
const std::vector<int>& indices) {
size_t total_size = images.size(); size_t total_size = images.size();
if (indices.size() != 0 && indices.size() != total_size) { if (indices.size() != 0 && indices.size() != total_size) {
FDERROR << "indices.size() should be 0 or images.size()." << std::endl; FDERROR << "indices.size() should be 0 or images.size()." << std::endl;
return false; return false;
} }
std::vector<FDMat> fd_images = WrapMat(images); std::vector<FDMat> fd_images = WrapMat(images);
if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, start_index, end_index, indices)) { if (!preprocessor_.Run(&fd_images, &reused_input_tensors_, start_index,
end_index, indices)) {
FDERROR << "Failed to preprocess the input image." << std::endl; FDERROR << "Failed to preprocess the input image." << std::endl;
return false; return false;
} }
@@ -99,13 +108,15 @@ bool Recognizer::BatchPredict(const std::vector<cv::Mat>& images,
return false; return false;
} }
if (!postprocessor_.Run(reused_output_tensors_, texts, rec_scores, start_index, total_size, indices)) { if (!postprocessor_.Run(reused_output_tensors_, texts, rec_scores,
FDERROR << "Failed to postprocess the inference cls_results by runtime." << std::endl; start_index, total_size, indices)) {
FDERROR << "Failed to postprocess the inference cls_results by runtime."
<< std::endl;
return false; return false;
} }
return true; return true;
} }
} // namesapce ocr } // namespace ocr
} // namespace vision } // namespace vision
} // namespace fastdeploy } // namespace fastdeploy

View File

@@ -17,10 +17,14 @@
namespace fastdeploy { namespace fastdeploy {
namespace vision { namespace vision {
cv::Mat VisOcr(const cv::Mat &im, const OCRResult &ocr_result) { cv::Mat VisOcr(const cv::Mat& im, const OCRResult& ocr_result,
const float score_threshold) {
auto vis_im = im.clone(); auto vis_im = im.clone();
for (int n = 0; n < ocr_result.boxes.size(); n++) { for (int n = 0; n < ocr_result.boxes.size(); n++) {
if (ocr_result.rec_scores[n] < score_threshold) {
continue;
}
cv::Point rook_points[4]; cv::Point rook_points[4];
for (int m = 0; m < 4; m++) { for (int m = 0; m < 4; m++) {
@@ -28,7 +32,7 @@ cv::Mat VisOcr(const cv::Mat &im, const OCRResult &ocr_result) {
int(ocr_result.boxes[n][m * 2 + 1])); int(ocr_result.boxes[n][m * 2 + 1]));
} }
const cv::Point *ppt[1] = {rook_points}; const cv::Point* ppt[1] = {rook_points};
int npt[] = {4}; int npt[] = {4};
cv::polylines(vis_im, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0); cv::polylines(vis_im, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0);
} }
@@ -36,7 +40,7 @@ cv::Mat VisOcr(const cv::Mat &im, const OCRResult &ocr_result) {
return vis_im; return vis_im;
} }
cv::Mat Visualize::VisOcr(const cv::Mat &im, const OCRResult &ocr_result) { cv::Mat Visualize::VisOcr(const cv::Mat& im, const OCRResult& ocr_result) {
FDWARNING FDWARNING
<< "DEPRECATED: fastdeploy::vision::Visualize::VisOcr is deprecated, " << "DEPRECATED: fastdeploy::vision::Visualize::VisOcr is deprecated, "
"please use fastdeploy::vision:VisOcr function instead." "please use fastdeploy::vision:VisOcr function instead."
@@ -51,7 +55,7 @@ cv::Mat Visualize::VisOcr(const cv::Mat &im, const OCRResult &ocr_result) {
int(ocr_result.boxes[n][m * 2 + 1])); int(ocr_result.boxes[n][m * 2 + 1]));
} }
const cv::Point *ppt[1] = {rook_points}; const cv::Point* ppt[1] = {rook_points};
int npt[] = {4}; int npt[] = {4};
cv::polylines(vis_im, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0); cv::polylines(vis_im, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0);
} }

View File

@@ -15,8 +15,8 @@
#pragma once #pragma once
#include "fastdeploy/vision/common/result.h" #include "fastdeploy/vision/common/result.h"
#include "opencv2/imgproc/imgproc.hpp"
#include "fastdeploy/vision/tracking/pptracking/model.h" #include "fastdeploy/vision/tracking/pptracking/model.h"
#include "opencv2/imgproc/imgproc.hpp"
namespace fastdeploy { namespace fastdeploy {
/** \brief All C++ FastDeploy Vision Models APIs are defined inside this namespace /** \brief All C++ FastDeploy Vision Models APIs are defined inside this namespace
@@ -41,9 +41,10 @@ class FASTDEPLOY_DECL Visualize {
bool remove_small_connected_area = false); bool remove_small_connected_area = false);
static cv::Mat RemoveSmallConnectedArea(const cv::Mat& alpha_pred, static cv::Mat RemoveSmallConnectedArea(const cv::Mat& alpha_pred,
float threshold); float threshold);
static cv::Mat SwapBackgroundMatting( static cv::Mat
const cv::Mat& im, const cv::Mat& background, const MattingResult& result, SwapBackgroundMatting(const cv::Mat& im, const cv::Mat& background,
bool remove_small_connected_area = false); const MattingResult& result,
bool remove_small_connected_area = false);
static cv::Mat SwapBackgroundSegmentation(const cv::Mat& im, static cv::Mat SwapBackgroundSegmentation(const cv::Mat& im,
const cv::Mat& background, const cv::Mat& background,
int background_label, int background_label,
@@ -90,9 +91,11 @@ FASTDEPLOY_DECL cv::Mat VisDetection(const cv::Mat& im,
* \param[in] font_size font size * \param[in] font_size font size
* \return cv::Mat type stores the visualized results * \return cv::Mat type stores the visualized results
*/ */
FASTDEPLOY_DECL cv::Mat VisClassification( FASTDEPLOY_DECL cv::Mat VisClassification(const cv::Mat& im,
const cv::Mat& im, const ClassifyResult& result, int top_k = 5, const ClassifyResult& result,
float score_threshold = 0.0f, float font_size = 0.5f); int top_k = 5,
float score_threshold = 0.0f,
float font_size = 0.5f);
/** \brief Show the visualized results with custom labels for classification models /** \brief Show the visualized results with custom labels for classification models
* *
* \param[in] im the input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format * \param[in] im the input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format
@@ -103,10 +106,10 @@ FASTDEPLOY_DECL cv::Mat VisClassification(
* \param[in] font_size font size * \param[in] font_size font size
* \return cv::Mat type stores the visualized results * \return cv::Mat type stores the visualized results
*/ */
FASTDEPLOY_DECL cv::Mat VisClassification( FASTDEPLOY_DECL cv::Mat
const cv::Mat& im, const ClassifyResult& result, VisClassification(const cv::Mat& im, const ClassifyResult& result,
const std::vector<std::string>& labels, int top_k = 5, const std::vector<std::string>& labels, int top_k = 5,
float score_threshold = 0.0f, float font_size = 0.5f); float score_threshold = 0.0f, float font_size = 0.5f);
/** \brief Show the visualized results for face detection models /** \brief Show the visualized results for face detection models
* *
* \param[in] im the input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format * \param[in] im the input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format
@@ -159,7 +162,8 @@ FASTDEPLOY_DECL cv::Mat VisMatting(const cv::Mat& im,
* \param[in] result the result produced by model * \param[in] result the result produced by model
* \return cv::Mat type stores the visualized results * \return cv::Mat type stores the visualized results
*/ */
FASTDEPLOY_DECL cv::Mat VisOcr(const cv::Mat& im, const OCRResult& ocr_result); FASTDEPLOY_DECL cv::Mat VisOcr(const cv::Mat& im, const OCRResult& ocr_result,
const float score_threshold = 0);
FASTDEPLOY_DECL cv::Mat VisMOT(const cv::Mat& img, const MOTResult& results, FASTDEPLOY_DECL cv::Mat VisMOT(const cv::Mat& img, const MOTResult& results,
float score_threshold = 0.0f, float score_threshold = 0.0f,
@@ -172,10 +176,10 @@ FASTDEPLOY_DECL cv::Mat VisMOT(const cv::Mat& img, const MOTResult& results,
* \param[in] remove_small_connected_area if remove_small_connected_area==true, the visualized result will not include the small connected areas * \param[in] remove_small_connected_area if remove_small_connected_area==true, the visualized result will not include the small connected areas
* \return cv::Mat type stores the visualized results * \return cv::Mat type stores the visualized results
*/ */
FASTDEPLOY_DECL cv::Mat SwapBackground(const cv::Mat& im, FASTDEPLOY_DECL cv::Mat
const cv::Mat& background, SwapBackground(const cv::Mat& im, const cv::Mat& background,
const MattingResult& result, const MattingResult& result,
bool remove_small_connected_area = false); bool remove_small_connected_area = false);
/** \brief Swap the image background with SegmentationResult /** \brief Swap the image background with SegmentationResult
* *
* \param[in] im the input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format * \param[in] im the input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format
@@ -196,12 +200,11 @@ FASTDEPLOY_DECL cv::Mat SwapBackground(const cv::Mat& im,
* \param[in] conf_threshold threshold for result scores, the result will not be shown if the score is less than conf_threshold * \param[in] conf_threshold threshold for result scores, the result will not be shown if the score is less than conf_threshold
* \return cv::Mat type stores the visualized results * \return cv::Mat type stores the visualized results
*/ */
FASTDEPLOY_DECL cv::Mat VisKeypointDetection(const cv::Mat& im, FASTDEPLOY_DECL cv::Mat
const KeyPointDetectionResult& results, VisKeypointDetection(const cv::Mat& im, const KeyPointDetectionResult& results,
float conf_threshold = 0.5f); float conf_threshold = 0.5f);
FASTDEPLOY_DECL cv::Mat VisHeadPose(const cv::Mat& im, FASTDEPLOY_DECL cv::Mat VisHeadPose(const cv::Mat& im,
const HeadPoseResult& result, const HeadPoseResult& result, int size = 50,
int size = 50,
int line_size = 1); int line_size = 1);
} // namespace vision } // namespace vision

View File

@@ -65,6 +65,29 @@ class DBDetectorPreprocessor:
""" """
self._preprocessor.set_normalize(mean, std, is_scale) self._preprocessor.set_normalize(mean, std, is_scale)
@property
def static_shape_infer(self):
return self._preprocessor.static_shape_infer
@static_shape_infer.setter
def static_shape_infer(self, value):
assert isinstance(
value,
bool), "The value to set `static_shape_infer` must be type of bool."
self._preprocessor.static_shape_infer = value
def disable_normalize(self):
"""
This function will disable normalize in preprocessing step.
"""
self._preprocessor.disable_normalize()
def disable_permute(self):
"""
This function will disable hwc2chw in preprocessing step.
"""
self._preprocessor.disable_permute()
class DBDetectorPostprocessor: class DBDetectorPostprocessor:
def __init__(self): def __init__(self):
@@ -358,6 +381,18 @@ class ClassifierPreprocessor:
list), "The value to set `cls_image_shape` must be type of list." list), "The value to set `cls_image_shape` must be type of list."
self._preprocessor.cls_image_shape = value self._preprocessor.cls_image_shape = value
def disable_normalize(self):
"""
This function will disable normalize in preprocessing step.
"""
self._preprocessor.disable_normalize()
def disable_permute(self):
"""
This function will disable hwc2chw in preprocessing step.
"""
self._preprocessor.disable_permute()
class ClassifierPostprocessor: class ClassifierPostprocessor:
def __init__(self): def __init__(self):
@@ -581,6 +616,18 @@ class RecognizerPreprocessor:
list), "The value to set `rec_image_shape` must be type of list." list), "The value to set `rec_image_shape` must be type of list."
self._preprocessor.rec_image_shape = value self._preprocessor.rec_image_shape = value
def disable_normalize(self):
"""
This function will disable normalize in preprocessing step.
"""
self._preprocessor.disable_normalize()
def disable_permute(self):
"""
This function will disable hwc2chw in preprocessing step.
"""
self._preprocessor.disable_permute()
class RecognizerPostprocessor: class RecognizerPostprocessor:
def __init__(self, label_path): def __init__(self, label_path):

View File

@@ -0,0 +1,15 @@
mean:
-
- 127.5
- 127.5
- 127.5
std:
-
- 127.5
- 127.5
- 127.5
model_path: ./ch_ppocr_mobile_v2.0_cls_infer/ch_ppocr_mobile_v2.0_cls_infer.onnx
outputs_nodes:
do_quantization: False
dataset:
output_folder: "./ch_ppocr_mobile_v2.0_cls_infer"

View File

@@ -0,0 +1,15 @@
mean:
-
- 123.675
- 116.28
- 103.53
std:
-
- 58.395
- 57.12
- 57.375
model_path: ./ch_PP-OCRv3_det_infer/ch_PP-OCRv3_det_infer.onnx
outputs_nodes:
do_quantization: False
dataset:
output_folder: "./ch_PP-OCRv3_det_infer"

View File

@@ -0,0 +1,15 @@
mean:
-
- 127.5
- 127.5
- 127.5
std:
-
- 127.5
- 127.5
- 127.5
model_path: ./ch_PP-OCRv3_rec_infer/ch_PP-OCRv3_rec_infer.onnx
outputs_nodes:
do_quantization: False
dataset:
output_folder: "./ch_PP-OCRv3_rec_infer"

View File

@@ -65,7 +65,10 @@ if __name__ == "__main__":
if not os.path.exists(yaml_config["output_folder"]): if not os.path.exists(yaml_config["output_folder"]):
os.mkdir(yaml_config["output_folder"]) os.mkdir(yaml_config["output_folder"])
model_base_name = os.path.basename(yaml_config["model_path"]).split(".")[0] name_list = os.path.basename(yaml_config["model_path"]).split(".")
model_base_name = ""
for name in name_list[0:-1]:
model_base_name += name
model_device_name = config.target_platform.lower() model_device_name = config.target_platform.lower()
if yaml_config["do_quantization"]: if yaml_config["do_quantization"]:
model_save_name = model_base_name + "_" + model_device_name + "_quantized" + ".rknn" model_save_name = model_base_name + "_" + model_device_name + "_quantized" + ".rknn"