[Hackthon_4th 242] Support en_ppstructure_mobile_v2.0_SLANet (#1816)

* first draft

* update api name

* fix bug

* fix bug and

* fix bug in c api

* fix bug in c_api

---------

Co-authored-by: DefTruth <31974251+DefTruth@users.noreply.github.com>
This commit is contained in:
thunder95
2023-04-27 10:45:14 +08:00
committed by GitHub
parent ef576ce875
commit 2c5fd91a7f
35 changed files with 2505 additions and 39 deletions

View File

@@ -22,6 +22,7 @@ add_executable(benchmark_ppmatting ${PROJECT_SOURCE_DIR}/benchmark_ppmatting.cc)
add_executable(benchmark_ppocr_det ${PROJECT_SOURCE_DIR}/benchmark_ppocr_det.cc)
add_executable(benchmark_ppocr_cls ${PROJECT_SOURCE_DIR}/benchmark_ppocr_cls.cc)
add_executable(benchmark_ppocr_rec ${PROJECT_SOURCE_DIR}/benchmark_ppocr_rec.cc)
add_executable(benchmark_structurev2_table ${PROJECT_SOURCE_DIR}/benchmark_structurev2_table.cc)
add_executable(benchmark_ppyoloe_r ${PROJECT_SOURCE_DIR}/benchmark_ppyoloe_r.cc)
add_executable(benchmark_ppyoloe_r_sophgo ${PROJECT_SOURCE_DIR}/benchmark_ppyoloe_r_sophgo.cc)
add_executable(benchmark_ppyolo ${PROJECT_SOURCE_DIR}/benchmark_ppyolo.cc)
@@ -55,6 +56,7 @@ if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
target_link_libraries(benchmark_ppocr_det ${FASTDEPLOY_LIBS} gflags pthread)
target_link_libraries(benchmark_ppocr_cls ${FASTDEPLOY_LIBS} gflags pthread)
target_link_libraries(benchmark_ppocr_rec ${FASTDEPLOY_LIBS} gflags pthread)
target_link_libraries(benchmark_structurev2_table ${FASTDEPLOY_LIBS} gflags pthread)
target_link_libraries(benchmark_ppyolo ${FASTDEPLOY_LIBS} gflags pthread)
target_link_libraries(benchmark_yolov3 ${FASTDEPLOY_LIBS} gflags pthread)
target_link_libraries(benchmark_fasterrcnn ${FASTDEPLOY_LIBS} gflags pthread)
@@ -85,6 +87,7 @@ else()
target_link_libraries(benchmark_ppocr_det ${FASTDEPLOY_LIBS} gflags)
target_link_libraries(benchmark_ppocr_cls ${FASTDEPLOY_LIBS} gflags)
target_link_libraries(benchmark_ppocr_rec ${FASTDEPLOY_LIBS} gflags)
target_link_libraries(benchmark_structurev2_table ${FASTDEPLOY_LIBS} gflags)
target_link_libraries(benchmark_ppyolo ${FASTDEPLOY_LIBS} gflags)
target_link_libraries(benchmark_yolov3 ${FASTDEPLOY_LIBS} gflags)
target_link_libraries(benchmark_fasterrcnn ${FASTDEPLOY_LIBS} gflags)

View File

@@ -44,6 +44,7 @@ fi
./benchmark_ppocr_rec --model ch_PP-OCRv3_rec_infer --image rec_img.jpg --rec_label_file ppocr_keys_v1.txt --config_path $CONFIG_PATH
./benchmark_ppocr_det --model ch_PP-OCRv2_det_infer --image 12.jpg --config_path $CONFIG_PATH
./benchmark_ppocr_rec --model ch_PP-OCRv2_rec_infer --image rec_img.jpg --rec_label_file ppocr_keys_v1.txt --config_path $CONFIG_PATH
./benchmark_ppocr_table --model en_ppstructure_mobile_v2.0_SLANet_infer --image table.jpg --table_char_dict_path table_structure_dict.txt --config_path $CONFIG_PATH
# PaddleDetection
./benchmark_ppyolov5 --model yolov5_s_300e_coco --image 000000014439.jpg --config_path $CONFIG_PATH

View File

@@ -0,0 +1,161 @@
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "flags.h"
#include "macros.h"
#include "option.h"
namespace vision = fastdeploy::vision;
namespace benchmark = fastdeploy::benchmark;
DEFINE_string(table_char_dict_path, "",
"Path of table character dict of PPOCR.");
DEFINE_string(trt_shape, "1,3,48,10:4,3,48,320:8,3,48,2304",
"Set min/opt/max shape for trt/paddle_trt backend."
"eg:--trt_shape 1,3,48,10:4,3,48,320:8,3,48,2304");
int main(int argc, char *argv[]) {
#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
// Initialization
auto option = fastdeploy::RuntimeOption();
if (!CreateRuntimeOption(&option, argc, argv, true)) {
return -1;
}
auto im = cv::imread(FLAGS_image);
std::unordered_map<std::string, std::string> config_info;
benchmark::ResultManager::LoadBenchmarkConfig(FLAGS_config_path,
&config_info);
std::string model_name, params_name, config_name;
auto model_format = fastdeploy::ModelFormat::PADDLE;
if (!UpdateModelResourceName(&model_name, &params_name, &config_name,
&model_format, config_info, false)) {
return -1;
}
auto model_file = FLAGS_model + sep + model_name;
auto params_file = FLAGS_model + sep + params_name;
if (config_info["backend"] == "paddle_trt") {
option.paddle_infer_option.collect_trt_shape = true;
}
if (config_info["backend"] == "paddle_trt" ||
config_info["backend"] == "trt") {
std::vector<std::vector<int32_t>> trt_shapes =
benchmark::ResultManager::GetInputShapes(FLAGS_trt_shape);
option.trt_option.SetShape("x", trt_shapes[0], trt_shapes[1],
trt_shapes[2]);
}
auto model_ppocr_table = vision::ocr::StructureV2Table(
model_file, params_file, FLAGS_table_char_dict_path, option,
model_format);
fastdeploy::vision::OCRResult result;
if (config_info["precision_compare"] == "true") {
std::string expect_structure_html =
"<html><body><table><thead><tr><td></td><td></td><td></td><td></"
"td><td></td></tr></thead><tbody><tr><td></td><td></td><td></td><td></"
"td><td></td></tr><tr><td></td><td></td><td></td><td></td><td></td></"
"tr><tr><td></td><td></td><td></td><td></td><td></td></tr><tr><td></"
"td><td></td><td></td><td></td><td></td></tr><tr><td></td><td></"
"td><td></td><td></td><td></td></tr><tr><td></td><td></td><td></"
"td><td></td><td></td></tr><tr><td></td><td></td><td></td><td></"
"td><td></td></tr><tr><td></td><td></td><td></td><td></td><td></td></"
"tr><tr><td></td><td></td><td></td><td></td><td></td></tr><tr><td></"
"td><td></td><td></td><td></td><td></td></tr><tr><td></td><td></"
"td><td></td><td></td><td></td></tr><tr><td></td><td></td><td></"
"td><td></td><td></td></tr><tr><td></td><td></td><td></td><td></"
"td><td></td></tr><tr><td></td><td></td><td></td><td></td><td></td></"
"tr><tr><td></td><td></td><td></td><td></td><td></td></tr></tbody></"
"table></body></html>";
std::vector<int> expect_box_coord{
41, 4, 97, 18, 161, 4, 173, 18, 216, 4, 225, 17, 272, 4,
283, 17, 321, 4, 348, 18, 33, 20, 106, 38, 150, 22, 180, 38,
202, 22, 235, 38, 262, 21, 293, 38, 326, 23, 343, 37, 27, 38,
109, 56, 150, 39, 179, 56, 204, 39, 236, 56, 263, 39, 292, 55,
329, 40, 343, 54, 22, 57, 118, 74, 152, 58, 176, 74, 204, 58,
236, 75, 262, 58, 291, 74, 326, 58, 344, 74, 27, 75, 119, 92,
150, 75, 177, 92, 204, 75, 235, 92, 260, 75, 292, 92, 326, 75,
346, 92, 44, 92, 102, 110, 150, 92, 177, 110, 205, 92, 236, 110,
262, 92, 290, 110, 329, 93, 339, 110, 41, 109, 102, 128, 151, 110,
175, 128, 205, 110, 236, 128, 262, 110, 291, 127, 329, 110, 338, 127,
42, 128, 102, 146, 149, 128, 177, 146, 205, 128, 237, 146, 262, 128,
291, 146, 329, 128, 339, 145, 31, 145, 110, 163, 150, 145, 178, 163,
206, 145, 237, 164, 262, 145, 292, 163, 324, 145, 342, 162, 40, 162,
108, 180, 154, 162, 175, 180, 209, 162, 231, 180, 266, 162, 286, 180,
325, 162, 341, 179, 38, 180, 105, 197, 152, 180, 177, 197, 207, 180,
236, 197, 262, 180, 291, 197, 329, 181, 339, 196, 42, 196, 102, 214,
151, 197, 179, 214, 205, 197, 236, 214, 263, 197, 291, 214, 320, 197,
349, 214, 46, 215, 100, 233, 149, 216, 179, 233, 204, 216, 238, 233,
262, 216, 291, 233, 321, 216, 345, 232, 42, 233, 104, 251, 147, 234,
179, 251, 203, 233, 237, 251, 260, 233, 294, 251, 326, 234, 341, 250,
19, 251, 120, 269, 148, 253, 180, 270, 202, 252, 240, 270, 259, 252,
294, 270, 324, 252, 347, 268, 16, 270, 123, 286, 146, 270, 182, 287,
200, 270, 238, 287, 256, 270, 294, 286, 319, 270, 353, 286};
// Run once at least
if (!model_ppocr_table.Predict(im, &result)) {
std::cerr << "Failed to predict." << std::endl;
return -1;
}
// 1. Test result diff
std::cout << "=============== Test Table Result diff =================\n";
// Calculate diff between two results.
std::string result_table_structure;
for (auto &structure : result.table_structure) {
result_table_structure += structure;
}
if (expect_structure_html == result_table_structure) {
std::cout << "PPOCR Table structure has no diff" << std::endl;
} else {
std::cout << "PPOCR Table structure has diff" << std::endl;
std::cout << "expected: " << expect_structure_html << std::endl;
std::cout << "result: " << result_table_structure << std::endl;
}
std::vector<int> table_box_coord;
for (auto &box : result.table_boxes) {
// x1 y1 x2 y1 x2 y2 x1 y2 => x1 y1 x2 y2
table_box_coord.push_back(box[0]);
table_box_coord.push_back(box[1]);
table_box_coord.push_back(box[2]);
table_box_coord.push_back(box[5]);
}
if (expect_box_coord.size() == table_box_coord.size()) {
std::cout << "table boxes num matched with expected: "
<< table_box_coord.size() << std::endl;
int max_diff = 0;
int total_diff = 0;
for (int i = 0; i < table_box_coord.size(); i++) {
int diff = std::abs(table_box_coord[i] - expect_box_coord[i]);
if (diff > max_diff) {
max_diff = diff;
}
total_diff += diff;
}
std::cout << "box coords, max_diff: " << max_diff << ", "
<< ", total diff: " << total_diff << ", average diff: "
<< total_diff / float(table_box_coord.size()) << std::endl;
} else {
std::cout << "boxes num has diff, expect box num: "
<< expect_box_coord.size() / 4
<< ", result box num:" << table_box_coord.size() / 4
<< std::endl;
}
}
BENCHMARK_MODEL(model_ppocr_table, model_ppocr_table.Predict(im, &result));
#endif
return 0;
}

View File

@@ -212,6 +212,7 @@ download_common_model_xvf https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP
download_common_model_xvf https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar ch_ppocr_mobile_v2.0_cls_infer.tar
download_common_model_xvf https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar ch_PP-OCRv2_det_infer.tar
download_common_model_xvf https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar ch_PP-OCRv2_rec_infer.tar
download_common_model_xvf https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar
# download images
download_common_file https://bj.bcebos.com/paddlehub/fastdeploy/rec_img.jpg rec_img.jpg

View File

@@ -18,5 +18,5 @@
#endif
#ifndef ENABLE_TEXT
#define ENABLE_TEXT
/* #undef ENABLE_TEXT */
#endif

View File

@@ -60,6 +60,11 @@ typedef struct FD_C_OneDimArrayCstr {
FD_C_Cstr* data;
} FD_C_OneDimArrayCstr; // std::vector<std::string>
typedef struct FD_C_TwoDimArrayCstr {
size_t size;
FD_C_OneDimArrayCstr* data;
} FD_C_TwoDimArrayCstr; // std::vector<std::vector<std::string>>
typedef struct FD_C_TwoDimArraySize {
size_t size;
FD_C_OneDimArraySize* data;
@@ -134,6 +139,8 @@ DECLARE_DESTROY_FD_TYPE_FUNCTION(OneDimArrayFloat);
DECLARE_DESTROY_FD_TYPE_FUNCTION(Cstr);
// FD_C_OneDimArrayCstr
DECLARE_DESTROY_FD_TYPE_FUNCTION(OneDimArrayCstr);
// FD_C_TwoDimArrayCstr
DECLARE_DESTROY_FD_TYPE_FUNCTION(TwoDimArrayCstr);
// FD_C_TwoDimArraySize
DECLARE_DESTROY_FD_TYPE_FUNCTION(TwoDimArraySize);
// FD_C_TwoDimArrayInt8

View File

@@ -318,6 +318,124 @@ FD_C_Bool FD_C_DBDetectorWrapperBatchPredict(
return successful;
}
// StructureV2Table
FD_C_StructureV2TableWrapper* FD_C_CreateStructureV2TableWrapper(
const char* model_file, const char* params_file,
const char* table_char_dict_path,
FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
const FD_C_ModelFormat model_format) {
auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
fd_c_runtime_option_wrapper);
FD_C_StructureV2TableWrapper* fd_c_structurev2table_wrapper =
new FD_C_StructureV2TableWrapper();
fd_c_structurev2table_wrapper->table_model =
std::unique_ptr<fastdeploy::vision::ocr::StructureV2Table>(
new fastdeploy::vision::ocr::StructureV2Table(
std::string(model_file), std::string(params_file),
std::string(table_char_dict_path), *runtime_option,
static_cast<fastdeploy::ModelFormat>(model_format)));
return fd_c_structurev2table_wrapper;
}
OCR_DECLARE_AND_IMPLEMENT_DESTROY_WRAPPER_FUNCTION(
StructureV2Table, fd_c_structurev2table_wrapper)
FD_C_Bool FD_C_StructureV2TableWrapperPredict(
FD_C_StructureV2TableWrapper* fd_c_structurev2table_wrapper, FD_C_Mat img,
FD_C_TwoDimArrayInt32* boxes_result,
FD_C_OneDimArrayCstr* structure_result) {
cv::Mat* im = reinterpret_cast<cv::Mat*>(img);
std::vector<std::array<int, 8>> boxes_result_out;
std::vector<std::string> structures_result_out;
auto& model = CHECK_AND_CONVERT_FD_TYPE(StructureV2TableWrapper,
fd_c_structurev2table_wrapper);
bool successful =
model->Predict(*im, &boxes_result_out, &structures_result_out);
if (successful) {
// copy boxes
const int boxes_coordinate_dim = 8;
boxes_result->size = boxes_result_out.size();
boxes_result->data = new FD_C_OneDimArrayInt32[boxes_result->size];
for (size_t i = 0; i < boxes_result_out.size(); i++) {
boxes_result->data[i].size = boxes_coordinate_dim;
boxes_result->data[i].data = new int[boxes_coordinate_dim];
for (size_t j = 0; j < boxes_coordinate_dim; j++) {
boxes_result->data[i].data[j] = boxes_result_out[i][j];
}
}
// copy structures
structure_result->size = structures_result_out.size();
structure_result->data = new FD_C_Cstr[structure_result->size];
for (int i = 0; i < structures_result_out.size(); i++) {
structure_result->data[i].size = structures_result_out[i].length();
structure_result->data[i].data =
new char[structures_result_out[i].length() + 1];
strncpy(structure_result->data[i].data, structures_result_out[i].c_str(),
structures_result_out[i].length());
}
}
return successful;
}
OCR_DECLARE_AND_IMPLEMENT_INITIALIZED_FUNCTION(StructureV2Table,
fd_c_structurev2table_wrapper)
FD_C_Bool FD_C_StructureV2TableWrapperBatchPredict(
FD_C_StructureV2TableWrapper* fd_c_structurev2table_wrapper,
FD_C_OneDimMat imgs, FD_C_ThreeDimArrayInt32* det_results,
FD_C_TwoDimArrayCstr* structure_results) {
std::vector<cv::Mat> imgs_vec;
std::vector<std::vector<std::array<int, 8>>> det_results_out;
std::vector<std::vector<std::string>> structure_results_out;
for (int i = 0; i < imgs.size; i++) {
imgs_vec.push_back(*(reinterpret_cast<cv::Mat*>(imgs.data[i])));
}
auto& model = CHECK_AND_CONVERT_FD_TYPE(StructureV2TableWrapper,
fd_c_structurev2table_wrapper);
bool successful =
model->BatchPredict(imgs_vec, &det_results_out, &structure_results_out);
if (successful) {
// copy results back to FD_C_ThreeDimArrayInt32
det_results->size = det_results_out.size();
det_results->data = new FD_C_TwoDimArrayInt32[det_results->size];
for (int batch_indx = 0; batch_indx < det_results->size; batch_indx++) {
const int boxes_coordinate_dim = 8;
det_results->data[batch_indx].size = det_results_out[batch_indx].size();
det_results->data[batch_indx].data =
new FD_C_OneDimArrayInt32[det_results->data[batch_indx].size];
for (size_t i = 0; i < det_results_out[batch_indx].size(); i++) {
det_results->data[batch_indx].data[i].size = boxes_coordinate_dim;
det_results->data[batch_indx].data[i].data =
new int[boxes_coordinate_dim];
for (size_t j = 0; j < boxes_coordinate_dim; j++) {
det_results->data[batch_indx].data[i].data[j] =
det_results_out[batch_indx][i][j];
}
}
}
// copy structures
structure_results->size = structure_results_out.size();
structure_results->data = new FD_C_OneDimArrayCstr[structure_results->size];
for (int batch_indx = 0; batch_indx < structure_results->size;
batch_indx++) {
structure_results->data[batch_indx].size =
structure_results_out[batch_indx].size();
structure_results->data[batch_indx].data =
new FD_C_Cstr[structure_results->data[batch_indx].size];
for (int i = 0; i < structure_results_out[batch_indx].size(); i++) {
structure_results->data[batch_indx].data[i].size =
structure_results_out[batch_indx][i].length();
structure_results->data[batch_indx].data[i].data =
new char[structure_results_out[batch_indx][i].length() + 1];
strncpy(structure_results->data[batch_indx].data[i].data,
structure_results_out[batch_indx][i].c_str(),
structure_results_out[batch_indx][i].length());
}
}
}
return successful;
}
// PPOCRv2
FD_C_PPOCRv2Wrapper* FD_C_CreatePPOCRv2Wrapper(
@@ -466,6 +584,82 @@ FD_C_Bool FD_C_PPOCRv3WrapperBatchPredict(
return successful;
}
// PPStructureV2Table
FD_C_PPStructureV2TableWrapper* FD_C_CreatePPStructureV2TableWrapper(
FD_C_DBDetectorWrapper* fd_c_det_model_wrapper,
FD_C_RecognizerWrapper* fd_c_rec_model_wrapper,
FD_C_StructureV2TableWrapper* fd_c_structurev2table_wrapper) {
FD_C_PPStructureV2TableWrapper* fd_c_ppstructurev2table_wrapper =
new FD_C_PPStructureV2TableWrapper();
auto& det_model =
CHECK_AND_CONVERT_FD_TYPE(DBDetectorWrapper, fd_c_det_model_wrapper);
auto& rec_model =
CHECK_AND_CONVERT_FD_TYPE(RecognizerWrapper, fd_c_rec_model_wrapper);
auto& table_model = CHECK_AND_CONVERT_FD_TYPE(StructureV2TableWrapper,
fd_c_structurev2table_wrapper);
fd_c_ppstructurev2table_wrapper->ppstructurev2table_model =
std::unique_ptr<fastdeploy::pipeline::PPStructureV2Table>(
new fastdeploy::pipeline::PPStructureV2Table(
det_model.get(), rec_model.get(), table_model.get()));
return fd_c_ppstructurev2table_wrapper;
}
PIPELINE_DECLARE_AND_IMPLEMENT_DESTROY_WRAPPER_FUNCTION(
PPStructureV2Table, fd_c_ppstructurev2table_wrapper)
FD_C_Bool FD_C_PPStructureV2TableWrapperPredict(
FD_C_PPStructureV2TableWrapper* fd_c_ppstructurev2table_wrapper,
FD_C_Mat img, FD_C_OCRResult* fd_c_ocr_result) {
cv::Mat* im = reinterpret_cast<cv::Mat*>(img);
auto& model = CHECK_AND_CONVERT_FD_TYPE(PPStructureV2TableWrapper,
fd_c_ppstructurev2table_wrapper);
FD_C_OCRResultWrapper* fd_c_ocr_result_wrapper =
FD_C_CreateOCRResultWrapper();
auto& ocr_result =
CHECK_AND_CONVERT_FD_TYPE(OCRResultWrapper, fd_c_ocr_result_wrapper);
bool successful = model->Predict(im, ocr_result.get());
if (successful) {
FD_C_OCRResultWrapperToCResult(fd_c_ocr_result_wrapper, fd_c_ocr_result);
}
FD_C_DestroyOCRResultWrapper(fd_c_ocr_result_wrapper);
return successful;
}
PIPELINE_DECLARE_AND_IMPLEMENT_INITIALIZED_FUNCTION(
PPStructureV2Table, fd_c_ppstructurev2table_wrapper)
FD_C_Bool FD_C_PPStructureV2TableWrapperBatchPredict(
FD_C_PPStructureV2TableWrapper* fd_c_ppstructurev2table_wrapper,
FD_C_OneDimMat imgs, FD_C_OneDimOCRResult* results) {
std::vector<cv::Mat> imgs_vec;
std::vector<FD_C_OCRResultWrapper*> results_wrapper_out;
std::vector<fastdeploy::vision::OCRResult> results_out;
for (int i = 0; i < imgs.size; i++) {
imgs_vec.push_back(*(reinterpret_cast<cv::Mat*>(imgs.data[i])));
FD_C_OCRResultWrapper* fd_ocr_result_wrapper =
FD_C_CreateOCRResultWrapper();
results_wrapper_out.push_back(fd_ocr_result_wrapper);
}
auto& model = CHECK_AND_CONVERT_FD_TYPE(PPStructureV2TableWrapper,
fd_c_ppstructurev2table_wrapper);
bool successful = model->BatchPredict(imgs_vec, &results_out);
if (successful) {
// copy results back to FD_C_OneDimOCRResult
results->size = results_out.size();
results->data = new FD_C_OCRResult[results->size];
for (int i = 0; i < results_out.size(); i++) {
(*CHECK_AND_CONVERT_FD_TYPE(OCRResultWrapper, results_wrapper_out[i])) =
std::move(results_out[i]);
FD_C_OCRResultWrapperToCResult(results_wrapper_out[i], &results->data[i]);
}
}
for (int i = 0; i < results_out.size(); i++) {
FD_C_DestroyOCRResultWrapper(results_wrapper_out[i]);
}
return successful;
}
#ifdef __cplusplus
}
#endif

View File

@@ -225,6 +225,68 @@ FASTDEPLOY_CAPI_EXPORT extern FD_C_Bool FD_C_DBDetectorWrapperBatchPredict(
FD_C_ThreeDimArrayInt32* det_results);
// StructureV2Table
typedef struct FD_C_StructureV2TableWrapper FD_C_StructureV2TableWrapper;
/** \brief Create a new FD_C_StructureV2TableWrapper object
*
* \param[in] model_file Path of model file, e.g ./en_ppstructure_mobile_v2.0_SLANet_infer/model.pdmodel.
* \param[in] params_file Path of parameter file, e.g ./en_ppstructure_mobile_v2.0_SLANet_infer/model.pdiparams, if the model format is ONNX, this parameter will be ignored.
* \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in `valid_cpu_backends`.
* \param[in] model_format Model format of the loaded model, default is Paddle format.
*
* \return Return a pointer to FD_C_StructureV2TableWrapper object
*/
FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_StructureV2TableWrapper*
FD_C_CreateStructureV2TableWrapper(
const char* model_file, const char* params_file, const char* table_char_dict_path,
FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
const FD_C_ModelFormat model_format);
/** \brief Destroy a FD_C_StructureV2TableWrapper object
*
* \param[in] fd_c_structurev2table_wrapper pointer to FD_C_DBDetectorWrapper object
*/
OCR_DECLARE_DESTROY_WRAPPER_FUNCTION(StructureV2Table, fd_c_structurev2table_wrapper);
/** \brief Predict the input image and get OCR table model result.
*
* \param[in] fd_c_structurev2table_wrapper pointer to FD_C_StructureV2TableWrapper object
* \param[in] img The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format.
* \param[in] boxes_result The output of OCR table model result will be writen to this structure.
* \return true if the prediction is successed, otherwise false.
*/
FASTDEPLOY_CAPI_EXPORT extern FD_C_Bool FD_C_StructureV2TableWrapperPredict(
__fd_keep FD_C_StructureV2TableWrapper* fd_c_structurev2table_wrapper, FD_C_Mat img,
FD_C_TwoDimArrayInt32* boxes_result, FD_C_OneDimArrayCstr* structure_result);
/** \brief Check if the model is initialized successfully
*
* \param[in] fd_c_dbdetector_wrapper pointer to FD_C_StructureV2TableWrapper object
*
* \return Return a bool of value true if initialized successfully
*/
OCR_DECLARE_INITIALIZED_FUNCTION(StructureV2Table, fd_c_structurev2table_wrapper);
/** \brief BatchPredict the input image and get OCR table model result.
*
* \param[in] fd_c_structurev2table_wrapper pointer to FD_C_StructureV2TableWrapper object
* \param[in] imgs The list input of image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format.
* \param[in] det_results The output of OCR table model result will be writen to this structure.
*
* \return true if the prediction is successed, otherwise false.
*/
FASTDEPLOY_CAPI_EXPORT extern FD_C_Bool FD_C_StructureV2TableWrapperBatchPredict(
__fd_keep FD_C_StructureV2TableWrapper* fd_c_structurev2table_wrapper, FD_C_OneDimMat imgs,
FD_C_ThreeDimArrayInt32* det_results, FD_C_TwoDimArrayCstr* structure_results);
// PPOCRv2
@@ -343,6 +405,63 @@ FASTDEPLOY_CAPI_EXPORT extern FD_C_Bool FD_C_PPOCRv3WrapperBatchPredict(
FD_C_OneDimOCRResult* batch_result);
// PPStructureV2Table
typedef struct FD_C_PPStructureV2TableWrapper FD_C_PPStructureV2TableWrapper;
/** \brief Set up the detection model path, classification model path and table recognition model path respectively.
*
* \param[in] det_model Path of detection model, e.g ./ch_PP-OCRv3_det_infer
* \param[in] rec_model Path of recognition model, e.g ./ch_PP-OCRv3_rec_infer
* \param[in] table_model Path of table model, e.g ./en_ppstructure_mobile_v2.0_SLANet_infer
*/
FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_PPStructureV2TableWrapper*
FD_C_CreatePPStructureV2TableWrapper(
FD_C_DBDetectorWrapper* det_model,
FD_C_RecognizerWrapper* rec_model,
FD_C_StructureV2TableWrapper* table_model);
/** \brief Destroy a FD_C_PPTableWrapper object
*
* \param[in] fd_c_ppstructurev2table_wrapper pointer to FD_C_PPStructureV2TableWrapper object
*/
OCR_DECLARE_DESTROY_WRAPPER_FUNCTION(PPStructureV2Table, fd_c_ppstructurev2table_wrapper);
/** \brief Predict the input image and get OCR result.
*
* \param[in] fd_c_ppstructurev2table_wrapper pointer to FD_C_PPStructureV2TableWrapper object
* \param[in] img The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format.
* \param[in] result The output OCR result will be writen to this structure.
* \return true if the prediction successed, otherwise false.
*/
FASTDEPLOY_CAPI_EXPORT extern FD_C_Bool FD_C_PPStructureV2TableWrapperPredict(
__fd_keep FD_C_PPStructureV2TableWrapper* fd_c_ppstructurev2table_wrapper, FD_C_Mat img,
FD_C_OCRResult* result);
/** \brief Check if the model is initialized successfully
*
* \param[in] fd_c_ppstructurev2table_wrapper pointer to FD_C_PPStructureV2TableWrapper object
*
* \return Return a bool of value true if initialized successfully
*/
OCR_DECLARE_INITIALIZED_FUNCTION(PPStructureV2Table, fd_c_ppstructurev2table_wrapper);
/** \brief BatchPredict the input image and get OCR result.
*
* \param[in] fd_c_ppstructurev2table_wrapper pointer to FD_C_PPStructureV2TableWrapper object
* \param[in] imgs The list of input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format.
* \param[in] batch_result The output list of OCR result will be writen to this structure.
* \return true if the prediction successed, otherwise false.
*/
FASTDEPLOY_CAPI_EXPORT extern FD_C_Bool FD_C_PPStructureV2TableWrapperBatchPredict(
__fd_keep FD_C_PPStructureV2TableWrapper* fd_c_ppstructurev2table_wrapper, FD_C_OneDimMat imgs,
FD_C_OneDimOCRResult* batch_result);
#ifdef __cplusplus
} // extern "C"
#endif

View File

@@ -70,6 +70,9 @@ typedef struct FD_C_OCRResult {
FD_C_OneDimArrayFloat rec_scores;
FD_C_OneDimArrayFloat cls_scores;
FD_C_OneDimArrayInt32 cls_labels;
FD_C_TwoDimArrayInt32 table_boxes;
FD_C_OneDimArrayCstr table_structure;
FD_C_Cstr table_html;
FD_C_ResultType type;
} FD_C_OCRResult;

View File

@@ -166,6 +166,10 @@ DECL_AND_IMPLEMENT_OCR_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
DECL_AND_IMPLEMENT_OCR_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
Classifier, fd_classifier_wrapper, classifier_model);
// Table
DECL_AND_IMPLEMENT_OCR_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
StructureV2Table, fd_structurev2_table_wrapper, table_model);
// PPOCRv2
DECL_AND_IMPLEMENT_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
PPOCRv2, fd_ppocrv2_wrapper, ppocrv2_model);
@@ -174,6 +178,11 @@ DECL_AND_IMPLEMENT_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
DECL_AND_IMPLEMENT_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
PPOCRv3, fd_ppocrv3_wrapper, ppocrv3_model);
// PPStructureV2Table
DECL_AND_IMPLEMENT_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
PPStructureV2Table, fd_ppstructurev2_table_wrapper,
ppstructurev2table_model);
// Segmentation models
// PaddleSegModel

View File

@@ -29,8 +29,10 @@
#include "fastdeploy/vision/ocr/ppocr/classifier.h"
#include "fastdeploy/vision/ocr/ppocr/dbdetector.h"
#include "fastdeploy/vision/ocr/ppocr/recognizer.h"
#include "fastdeploy/vision/ocr/ppocr/structurev2_table.h"
#include "fastdeploy/vision/ocr/ppocr/ppocr_v2.h"
#include "fastdeploy/vision/ocr/ppocr/ppocr_v3.h"
#include "fastdeploy/vision/ocr/ppocr/ppstructurev2_table.h"
#include "fastdeploy/vision/segmentation/ppseg/model.h"
#define DEFINE_RESULT_WRAPPER_STRUCT(typename, varname) typedef struct FD_C_##typename##Wrapper { \
@@ -176,12 +178,18 @@ DEFINE_OCR_MODEL_WRAPPER_STRUCT(DBDetector, dbdetector_model);
// Classifier
DEFINE_OCR_MODEL_WRAPPER_STRUCT(Classifier, classifier_model);
// StructureV2Table
DEFINE_OCR_MODEL_WRAPPER_STRUCT(StructureV2Table, table_model);
// PPOCRv2
DEFINE_PIPELINE_MODEL_WRAPPER_STRUCT(PPOCRv2, ppocrv2_model);
// PPOCRv3
DEFINE_PIPELINE_MODEL_WRAPPER_STRUCT(PPOCRv3, ppocrv3_model);
// PPStructureV2Table
DEFINE_PIPELINE_MODEL_WRAPPER_STRUCT(PPStructureV2Table, ppstructurev2table_model);
// Segmentation models
// PaddleSegModel
@@ -383,12 +391,18 @@ DECLARE_OCR_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(DBDetector, fd_dbdetector_wrappe
// Classifier
DECLARE_OCR_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(Classifier, fd_classifier_wrapper);
// Table
DECLARE_OCR_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(StructureV2Table, fd_structurev2_table_wrapper);
// PPOCRv2
DECLARE_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(PPOCRv2, fd_ppocrv2_wrapper);
// PPOCRv3
DECLARE_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(PPOCRv3, fd_ppocrv3_wrapper);
// PPStructureV2Table
DECLARE_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(PPStructureV2Table, fd_ppstructurev2_table_wrapper);
// Segmentation models
// PaddleSegModel

View File

@@ -14,6 +14,11 @@ add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc)
# 添加FastDeploy库依赖
target_link_libraries(infer_demo ${FASTDEPLOY_LIBS})
# PPStructure-V2-Table
add_executable(infer_ppstructurev2_table ${PROJECT_SOURCE_DIR}/infer_ppstructurev2_table.cc)
# 添加FastDeploy库依赖
target_link_libraries(infer_ppstructurev2_table ${FASTDEPLOY_LIBS})
# Only Det
add_executable(infer_det ${PROJECT_SOURCE_DIR}/infer_det.cc)
# 添加FastDeploy库依赖
@@ -28,3 +33,8 @@ target_link_libraries(infer_cls ${FASTDEPLOY_LIBS})
add_executable(infer_rec ${PROJECT_SOURCE_DIR}/infer_rec.cc)
# 添加FastDeploy库依赖
target_link_libraries(infer_rec ${FASTDEPLOY_LIBS})
# Only Table
add_executable(infer_structurev2_table ${PROJECT_SOURCE_DIR}/infer_structurev2_table.cc)
# 添加FastDeploy库依赖
target_link_libraries(infer_structurev2_table ${FASTDEPLOY_LIBS})

View File

@@ -43,10 +43,15 @@ tar -xvf ch_ppocr_mobile_v2.0_cls_infer.tar
# 下载PP-OCRv3文字识别模型
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar
tar -xvf ch_PP-OCRv3_rec_infer.tar
# 下载PPStructureV2表格识别模型
wget https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar
tar xf ch_ppstructure_mobile_v2.0_SLANet_infer.tar
# 下载预测图片与字典文件
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppstructure/docs/table/table.jpg
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/dict/table_structure_dict_ch.txt
# 运行部署示例
# 在CPU上使用Paddle Inference推理
@@ -77,6 +82,9 @@ wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_
# 在CPU上,单独使用文字识别模型部署
./infer_rec ./ch_PP-OCRv3_rec_infer ./ppocr_keys_v1.txt ./12.jpg 0
# 在CPU上,单独使用表格识别模型部署
./infer_structurev2_table ./ch_ppstructure_mobile_v2.0_SLANet_infer ./table_structure_dict_ch.txt ./table.jpg 0
```
运行完成可视化结果如下图所示

View File

@@ -0,0 +1,177 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/vision.h"
#ifdef WIN32
const char sep = '\\';
#else
const char sep = '/';
#endif
void InitAndInfer(const std::string &det_model_dir,
const std::string &rec_model_dir,
const std::string &table_model_dir,
const std::string &rec_label_file,
const std::string &table_char_dict_path,
const std::string &image_file,
const fastdeploy::RuntimeOption &option) {
auto det_model_file = det_model_dir + sep + "inference.pdmodel";
auto det_params_file = det_model_dir + sep + "inference.pdiparams";
auto rec_model_file = rec_model_dir + sep + "inference.pdmodel";
auto rec_params_file = rec_model_dir + sep + "inference.pdiparams";
auto table_model_file = table_model_dir + sep + "inference.pdmodel";
auto table_params_file = table_model_dir + sep + "inference.pdiparams";
auto det_option = option;
auto rec_option = option;
auto table_option = option;
// The rec model can inference a batch of images now.
// User could initialize the inference batch size and set them after create
// PP-OCR model.
int rec_batch_size = 1;
// If use TRT backend, the dynamic shape will be set as follow.
// We recommend that users set the length and height of the detection model to
// a multiple of 32.
// We also recommend that users set the Trt input shape as follow.
det_option.SetTrtInputShape("x", {1, 3, 64, 64}, {1, 3, 640, 640},
{1, 3, 960, 960});
rec_option.SetTrtInputShape("x", {1, 3, 48, 10}, {rec_batch_size, 3, 48, 320},
{rec_batch_size, 3, 48, 2304});
table_option.SetTrtInputShape("x", {1, 3, 488, 488}, {1, 3, 488, 488},
{1, 3, 488, 488});
// Users could save TRT cache file to disk as follow.
det_option.SetTrtCacheFile(det_model_dir + sep + "det_trt_cache.trt");
rec_option.SetTrtCacheFile(rec_model_dir + sep + "rec_trt_cache.trt");
table_option.SetTrtCacheFile(table_model_dir + sep + "table_trt_cache.trt");
auto det_model = fastdeploy::vision::ocr::DBDetector(
det_model_file, det_params_file, det_option);
auto rec_model = fastdeploy::vision::ocr::Recognizer(
rec_model_file, rec_params_file, rec_label_file, rec_option);
auto table_model = fastdeploy::vision::ocr::StructureV2Table(
table_model_file, table_params_file, table_char_dict_path, table_option);
assert(det_model.Initialized());
assert(rec_model.Initialized());
assert(table_model.Initialized());
// Parameters settings for pre and post processing of Det/Cls/Rec Models.
// All parameters are set to default values.
det_model.GetPreprocessor().SetMaxSideLen(960);
det_model.GetPostprocessor().SetDetDBThresh(0.3);
det_model.GetPostprocessor().SetDetDBBoxThresh(0.6);
det_model.GetPostprocessor().SetDetDBUnclipRatio(1.5);
det_model.GetPostprocessor().SetDetDBScoreMode("slow");
det_model.GetPostprocessor().SetUseDilation(0);
rec_model.GetPreprocessor().SetStaticShapeInfer(true);
rec_model.GetPreprocessor().SetRecImageShape({3, 48, 320});
// The classification model is optional, so the PP-OCR can also be connected
// in series as follows
auto ppstructurev2_table = fastdeploy::pipeline::PPStructureV2Table(
&det_model, &rec_model, &table_model);
// Set inference batch size for cls model and rec model, the value could be -1
// and 1 to positive infinity.
// When inference batch size is set to -1, it means that the inference batch
// size of the rec models will be the same as the number of boxes detected
// by the det model.
ppstructurev2_table.SetRecBatchSize(rec_batch_size);
if (!ppstructurev2_table.Initialized()) {
std::cerr << "Failed to initialize PP-OCR-Table." << std::endl;
return;
}
auto im = cv::imread(image_file);
auto im_bak = im.clone();
fastdeploy::vision::OCRResult result;
if (!ppstructurev2_table.Predict(&im, &result)) {
std::cerr << "Failed to predict." << std::endl;
return;
}
std::cout << result.Str() << std::endl;
auto vis_im = fastdeploy::vision::VisOcr(im_bak, result);
cv::imwrite("vis_result.jpg", vis_im);
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
}
int main(int argc, char *argv[]) {
if (argc < 8) {
std::cout << "Usage: infer_ppstructurev2_table path/to/det_model "
"path/to/rec_model "
"path/to/table_model path/to/rec_label_file "
"path/to/table_char_dict_path path/to/image "
"run_option, "
"e.g ./infer_ppstructurev2_table ./ch_PP-OCRv3_det_infer "
"./ch_ppocr_mobile_v2.0_cls_infer ./ch_PP-OCRv3_rec_infer "
"./ppocr_keys_v1.txt ./12.jpg 0"
<< std::endl;
std::cout << "The data type of run_option is int, e.g. 0: run with paddle "
"inference on cpu;"
<< std::endl;
return -1;
}
fastdeploy::RuntimeOption option;
int flag = std::atoi(argv[7]);
std::cout << "flag: " << flag << std::endl;
if (flag == 0) {
option.UseCpu();
option.UsePaddleBackend(); // Paddle Inference
} else if (flag == 1) {
option.UseCpu();
option.UseOpenVINOBackend(); // OpenVINO
} else if (flag == 2) {
option.UseCpu();
option.UseOrtBackend(); // ONNX Runtime
} else if (flag == 3) {
option.UseCpu();
option.UseLiteBackend(); // Paddle Lite
} else if (flag == 4) {
option.UseGpu();
option.UsePaddleBackend(); // Paddle Inference
} else if (flag == 5) {
option.UseGpu();
option.UsePaddleInferBackend();
option.paddle_infer_option.collect_trt_shape = true;
option.paddle_infer_option.enable_trt = true; // Paddle-TensorRT
} else if (flag == 6) {
option.UseGpu();
option.UseOrtBackend(); // ONNX Runtime
} else if (flag == 7) {
option.UseGpu();
option.UseTrtBackend(); // TensorRT
}
std::string det_model_dir = argv[1];
std::string rec_model_dir = argv[2];
std::string table_model_dir = argv[3];
std::string rec_label_file = argv[4];
std::string table_char_dict_path = argv[5];
std::string test_image = argv[6];
InitAndInfer(det_model_dir, rec_model_dir, table_model_dir, rec_label_file,
table_char_dict_path, test_image, option);
return 0;
}

View File

@@ -0,0 +1,74 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/vision.h"
#ifdef WIN32
const char sep = '\\';
#else
const char sep = '/';
#endif
void InitAndInfer(const std::string &table_model_dir,
const std::string &image_file,
const std::string &table_char_dict_path,
const fastdeploy::RuntimeOption &option) {
auto table_model_file = table_model_dir + sep + "inference.pdmodel";
auto table_params_file = table_model_dir + sep + "inference.pdiparams";
auto table_option = option;
auto table_model = fastdeploy::vision::ocr::StructureV2Table(
table_model_file, table_params_file, table_char_dict_path, table_option);
assert(table_model.Initialized());
auto im = cv::imread(image_file);
auto im_bak = im.clone();
fastdeploy::vision::OCRResult result;
if (!table_model.Predict(im, &result)) {
std::cerr << "Failed to predict." << std::endl;
return;
}
std::cout << result.Str() << std::endl;
}
int main(int argc, char *argv[]) {
if (argc < 5) {
std::cout << "Usage: infer_demo path/to/table_model path/to/image "
"path/to/table_dict_path"
"run_option, "
"e.g ./infer_structurev2_table ch_ppocr_mobile_v2.0_cls_infer "
"table.jpg table_structure_dict.txt 0"
<< std::endl;
std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
"with gpu;."
<< std::endl;
return -1;
}
fastdeploy::RuntimeOption option;
int flag = std::atoi(argv[4]);
if (flag == 0) {
option.UseCpu();
} else if (flag == 1) {
option.UseGpu();
}
std::string table_model_dir = argv[1];
std::string test_image = argv[2];
std::string table_char_dict_path = argv[3];
InitAndInfer(table_model_dir, test_image, table_char_dict_path, option);
return 0;
}

View File

@@ -36,10 +36,15 @@ tar -xvf ch_ppocr_mobile_v2.0_cls_infer.tar
# 下载PP-OCRv3文字识别模型
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar
tar -xvf ch_PP-OCRv3_rec_infer.tar
# 下载PPStructureV2表格识别模型
wget https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar
tar xf ch_ppstructure_mobile_v2.0_SLANet_infer.tar
# 下载预测图片与字典文件
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppstructure/docs/table/table.jpg
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/dict/table_structure_dict_ch.txt
# 运行部署示例
# 在CPU上使用Paddle Inference推理
@@ -71,6 +76,8 @@ python infer_cls.py --cls_model ch_ppocr_mobile_v2.0_cls_infer --image 12.jpg --
# 在CPU上,单独使用文字识别模型部署
python infer_rec.py --rec_model ch_PP-OCRv3_rec_infer --rec_label_file ppocr_keys_v1.txt --image 12.jpg --device cpu
# 在CPU上,单独使用文字识别模型部署
python infer_structurev2_table.py --table_model ./ch_ppstructure_mobile_v2.0_SLANet_infer --table_char_dict_path ./table_structure_dict_ch.txt --image table.jpg --device cpu
```
运行完成可视化结果如下图所示

View File

@@ -0,0 +1,175 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import fastdeploy as fd
import cv2
import os
def parse_arguments():
import argparse
parser = argparse.ArgumentParser()
parser.add_argument(
"--det_model", required=True, help="Path of Detection model of PPOCR.")
parser.add_argument(
"--rec_model",
required=True,
help="Path of Recognization model of PPOCR.")
parser.add_argument(
"--table_model",
required=True,
help="Path of Table recognition model of PPOCR.")
parser.add_argument(
"--rec_label_file",
required=True,
help="Path of Recognization model of PPOCR.")
parser.add_argument(
"--table_char_dict_path",
type=str,
required=True,
help="tabel recognition dict path.")
parser.add_argument(
"--rec_bs",
type=int,
default=6,
help="Recognition model inference batch size")
parser.add_argument(
"--image", type=str, required=True, help="Path of test image file.")
parser.add_argument(
"--device",
type=str,
default='cpu',
help="Type of inference device, support 'cpu' or 'gpu'.")
parser.add_argument(
"--device_id",
type=int,
default=0,
help="Define which GPU card used to run model.")
parser.add_argument(
"--backend",
type=str,
default="default",
help="Type of inference backend, support ort/trt/paddle/openvino, default 'openvino' for cpu, 'tensorrt' for gpu"
)
return parser.parse_args()
def build_option(args):
det_option = fd.RuntimeOption()
rec_option = fd.RuntimeOption()
table_option = fd.RuntimeOption()
if args.device.lower() == "gpu":
det_option.use_gpu(args.device_id)
rec_option.use_gpu(args.device_id)
table_option.use_gpu(args.device_id)
if args.backend.lower() == "trt":
assert args.device.lower(
) == "gpu", "TensorRT backend require inference on device GPU."
det_option.use_trt_backend()
rec_option.use_trt_backend()
table_option.use_trt_backend()
# If use TRT backend, the dynamic shape will be set as follow.
# We recommend that users set the length and height of the detection model to a multiple of 32.
# We also recommend that users set the Trt input shape as follow.
det_option.set_trt_input_shape("x", [1, 3, 64, 64], [1, 3, 640, 640],
[1, 3, 960, 960])
rec_option.set_trt_input_shape("x", [1, 3, 48, 10],
[args.rec_bs, 3, 48, 320],
[args.rec_bs, 3, 48, 2304])
table_option.set_trt_input_shape("x", [1, 3, 488, 488])
# Users could save TRT cache file to disk as follow.
det_option.set_trt_cache_file(args.det_model + "/det_trt_cache.trt")
rec_option.set_trt_cache_file(args.rec_model + "/rec_trt_cache.trt")
table_option.set_trt_cache_file(args.table_model +
"/table_trt_cache.trt")
elif args.backend.lower() == "ort":
det_option.use_ort_backend()
rec_option.use_ort_backend()
table_option.use_ort_backend()
elif args.backend.lower() == "paddle":
det_option.use_paddle_infer_backend()
rec_option.use_paddle_infer_backend()
table_option.use_paddle_infer_backend()
elif args.backend.lower() == "openvino":
assert args.device.lower(
) == "cpu", "OpenVINO backend require inference on device CPU."
det_option.use_openvino_backend()
rec_option.use_openvino_backend()
table_option.use_openvino_backend()
return det_option, rec_option, table_option
args = parse_arguments()
det_model_file = os.path.join(args.det_model, "inference.pdmodel")
det_params_file = os.path.join(args.det_model, "inference.pdiparams")
rec_model_file = os.path.join(args.rec_model, "inference.pdmodel")
rec_params_file = os.path.join(args.rec_model, "inference.pdiparams")
rec_label_file = args.rec_label_file
table_model_file = os.path.join(args.table_model, "inference.pdmodel")
table_params_file = os.path.join(args.table_model, "inference.pdiparams")
table_char_dict_path = args.table_char_dict_path
# Set the runtime option
det_option, rec_option, table_option = build_option(args)
det_model = fd.vision.ocr.DBDetector(
det_model_file, det_params_file, runtime_option=det_option)
rec_model = fd.vision.ocr.Recognizer(
rec_model_file, rec_params_file, rec_label_file, runtime_option=rec_option)
table_model = fd.vision.ocr.StructureV2Table(
table_model_file,
table_params_file,
table_char_dict_path,
runtime_option=table_option)
det_model.preprocessor.max_side_len = 960
det_model.postprocessor.det_db_thresh = 0.3
det_model.postprocessor.det_db_box_thresh = 0.6
det_model.postprocessor.det_db_unclip_ratio = 1.5
det_model.postprocessor.det_db_score_mode = "slow"
det_model.postprocessor.use_dilation = False
ppstructurev2_table = fd.vision.ocr.PPStructureV2Table(
det_model=det_model, rec_model=rec_model, table_model=table_model)
ppstructurev2_table.rec_batch_size = args.rec_bs
# Read the input image
im = cv2.imread(args.image)
# Predict and reutrn the results
result = ppstructurev2_table.predict(im)
print(result)
# Visuliaze the results.
vis_im = fd.vision.vis_ppocr(im, result)
cv2.imwrite("visualized_result.jpg", vis_im)
print("Visualized result save in ./visualized_result.jpg")

View File

@@ -0,0 +1,77 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import fastdeploy as fd
import cv2
import os
def parse_arguments():
import argparse
parser = argparse.ArgumentParser()
parser.add_argument(
"--table_model",
required=True,
help="Path of Table recognition model of PPOCR.")
parser.add_argument(
"--table_char_dict_path",
type=str,
required=True,
help="tabel recognition dict path.")
parser.add_argument(
"--image", type=str, required=True, help="Path of test image file.")
parser.add_argument(
"--device",
type=str,
default='cpu',
help="Type of inference device, support 'cpu' or 'gpu'.")
parser.add_argument(
"--device_id",
type=int,
default=0,
help="Define which GPU card used to run model.")
return parser.parse_args()
def build_option(args):
table_option = fd.RuntimeOption()
if args.device.lower() == "gpu":
table_option.use_gpu(args.device_id)
return table_option
args = parse_arguments()
table_model_file = os.path.join(args.table_model, "inference.pdmodel")
table_params_file = os.path.join(args.table_model, "inference.pdiparams")
# Set the runtime option
table_option = build_option(args)
# Create the table_model
table_model = fd.vision.ocr.StructureV2Table(
table_model_file, table_params_file, args.table_char_dict_path,
table_option)
# Read the image
im = cv2.imread(args.image)
# Predict and return the results
result = table_model.predict(im)
print(result)

View File

@@ -53,8 +53,10 @@
#include "fastdeploy/vision/matting/ppmatting/ppmatting.h"
#include "fastdeploy/vision/ocr/ppocr/classifier.h"
#include "fastdeploy/vision/ocr/ppocr/dbdetector.h"
#include "fastdeploy/vision/ocr/ppocr/structurev2_table.h"
#include "fastdeploy/vision/ocr/ppocr/ppocr_v2.h"
#include "fastdeploy/vision/ocr/ppocr/ppocr_v3.h"
#include "fastdeploy/vision/ocr/ppocr/ppstructurev2_table.h"
#include "fastdeploy/vision/ocr/ppocr/recognizer.h"
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
#include "fastdeploy/vision/segmentation/ppseg/model.h"

51
fastdeploy/vision/common/result.cc Executable file → Normal file
View File

@@ -649,6 +649,32 @@ std::string OCRResult::Str() {
}
out = out + "\n";
}
if (table_boxes.size() > 0 && table_structure.size() > 0) {
for (int n = 0; n < boxes.size(); n++) {
out = out + "table boxes: [";
for (int i = 0; i < 4; i++) {
out = out + "[" + std::to_string(table_boxes[n][i * 2]) + "," +
std::to_string(table_boxes[n][i * 2 + 1]) + "]";
if (i != 1) {
out = out + ",";
}
}
out = out + "]";
}
out = out + "\ntable structure: ";
for (int m = 0; m < table_structure.size(); m++) {
out += table_structure[m];
}
if (!table_html.empty()) {
out = out + "\n" + "table html: " + table_html;
}
}
std::vector<std::array<int, 8>> table_boxes;
std::vector<std::string> table_structure;
return out;
} else if (boxes.size() == 0 && rec_scores.size() > 0 &&
@@ -680,6 +706,31 @@ std::string OCRResult::Str() {
out = out + "\n";
}
return out;
} else if (boxes.size() == 0 && table_boxes.size() > 0 &&
table_structure.size() > 0) {
std::string out;
for (int n = 0; n < table_boxes.size(); n++) {
out = out + ", table boxes: [";
for (int i = 0; i < 2; i++) {
out = out + "[" + std::to_string(table_boxes[n][i * 2]) + "," +
std::to_string(table_boxes[n][i * 2 + 1]) + "]";
if (i != 1) {
out = out + ",";
}
}
out = out + "]";
}
out = out + "\ntable structure: ";
for (int m = 0; m < table_structure.size(); m++) {
out += table_structure[m];
}
if (!table_html.empty()) {
out = out + "\n" + "table html: " + table_html;
}
return out;
}
no_result = no_result + "No Results!";

View File

@@ -216,6 +216,10 @@ struct FASTDEPLOY_DECL OCRResult : public BaseResult {
std::vector<float> cls_scores;
std::vector<int32_t> cls_labels;
std::vector<std::array<int, 8>> table_boxes;
std::vector<std::string> table_structure;
std::string table_html;
ResultType type = ResultType::OCR;
void Clear();

View File

@@ -19,11 +19,13 @@ namespace fastdeploy {
void BindPPOCRModel(pybind11::module& m);
void BindPPOCRv3(pybind11::module& m);
void BindPPOCRv2(pybind11::module& m);
void BindPPStructureV2Table(pybind11::module& m);
void BindOcr(pybind11::module& m) {
auto ocr_module = m.def_submodule("ocr", "Module to deploy OCR models");
BindPPOCRModel(ocr_module);
BindPPOCRv3(ocr_module);
BindPPOCRv2(ocr_module);
BindPPStructureV2Table(ocr_module);
}
} // namespace fastdeploy

View File

@@ -321,5 +321,94 @@ void BindPPOCRModel(pybind11::module& m) {
self.BatchPredict(images, &ocr_result);
return ocr_result;
});
// Table
pybind11::class_<vision::ocr::StructureV2TablePreprocessor,
vision::ProcessorManager>(m, "StructureV2TablePreprocessor")
.def(pybind11::init<>())
.def("run", [](vision::ocr::StructureV2TablePreprocessor& self,
std::vector<pybind11::array>& im_list) {
std::vector<vision::FDMat> images;
for (size_t i = 0; i < im_list.size(); ++i) {
images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
}
std::vector<FDTensor> outputs;
if (!self.Run(&images, &outputs)) {
throw std::runtime_error(
"Failed to preprocess the input data in "
"StructureV2TablePreprocessor.");
}
auto batch_det_img_info = self.GetBatchImgInfo();
for (size_t i = 0; i < outputs.size(); ++i) {
outputs[i].StopSharing();
}
return std::make_pair(outputs, *batch_det_img_info);
});
pybind11::class_<vision::ocr::StructureV2TablePostprocessor>(
m, "StructureV2TablePostprocessor")
.def(pybind11::init<std::string>())
.def("run",
[](vision::ocr::StructureV2TablePostprocessor& self,
std::vector<FDTensor>& inputs,
const std::vector<std::array<int, 4>>& batch_det_img_info) {
std::vector<std::vector<std::array<int, 8>>> boxes;
std::vector<std::vector<std::string>> structure_list;
if (!self.Run(inputs, &boxes, &structure_list,
batch_det_img_info)) {
throw std::runtime_error(
"Failed to preprocess the input data in "
"StructureV2TablePostprocessor.");
}
return std::make_pair(boxes, structure_list);
})
.def("run",
[](vision::ocr::StructureV2TablePostprocessor& self,
std::vector<pybind11::array>& input_array,
const std::vector<std::array<int, 4>>& batch_det_img_info) {
std::vector<FDTensor> inputs;
PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
std::vector<std::vector<std::array<int, 8>>> boxes;
std::vector<std::vector<std::string>> structure_list;
if (!self.Run(inputs, &boxes, &structure_list,
batch_det_img_info)) {
throw std::runtime_error(
"Failed to preprocess the input data in "
"StructureV2TablePostprocessor.");
}
return std::make_pair(boxes, structure_list);
});
pybind11::class_<vision::ocr::StructureV2Table, FastDeployModel>(
m, "StructureV2Table")
.def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
ModelFormat>())
.def(pybind11::init<>())
.def_property_readonly("preprocessor",
&vision::ocr::StructureV2Table::GetPreprocessor)
.def_property_readonly("postprocessor",
&vision::ocr::StructureV2Table::GetPostprocessor)
.def("predict",
[](vision::ocr::StructureV2Table& self, pybind11::array& data) {
auto mat = PyArrayToCvMat(data);
vision::OCRResult ocr_result;
self.Predict(mat, &ocr_result);
return ocr_result;
})
.def("batch_predict", [](vision::ocr::StructureV2Table& self,
std::vector<pybind11::array>& data) {
std::vector<cv::Mat> images;
for (size_t i = 0; i < data.size(); ++i) {
images.push_back(PyArrayToCvMat(data[i]));
}
std::vector<vision::OCRResult> ocr_results;
self.BatchPredict(images, &ocr_results);
return ocr_results;
});
}
} // namespace fastdeploy

72
fastdeploy/vision/ocr/ppocr/ppocr_pybind.cc Executable file → Normal file
View File

@@ -12,32 +12,33 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include <pybind11/stl.h>
#include "fastdeploy/pybind/main.h"
namespace fastdeploy {
void BindPPOCRv3(pybind11::module& m) {
// PPOCRv3
pybind11::class_<pipeline::PPOCRv3, FastDeployModel>(
m, "PPOCRv3")
pybind11::class_<pipeline::PPOCRv3, FastDeployModel>(m, "PPOCRv3")
.def(pybind11::init<fastdeploy::vision::ocr::DBDetector*,
fastdeploy::vision::ocr::Classifier*,
fastdeploy::vision::ocr::Recognizer*>())
.def(pybind11::init<fastdeploy::vision::ocr::DBDetector*,
fastdeploy::vision::ocr::Recognizer*>())
.def_property("cls_batch_size", &pipeline::PPOCRv3::GetClsBatchSize, &pipeline::PPOCRv3::SetClsBatchSize)
.def_property("rec_batch_size", &pipeline::PPOCRv3::GetRecBatchSize, &pipeline::PPOCRv3::SetRecBatchSize)
.def("clone", [](pipeline::PPOCRv3& self) {
return self.Clone();
})
.def("predict", [](pipeline::PPOCRv3& self,
pybind11::array& data) {
.def_property("cls_batch_size", &pipeline::PPOCRv3::GetClsBatchSize,
&pipeline::PPOCRv3::SetClsBatchSize)
.def_property("rec_batch_size", &pipeline::PPOCRv3::GetRecBatchSize,
&pipeline::PPOCRv3::SetRecBatchSize)
.def("clone", [](pipeline::PPOCRv3& self) { return self.Clone(); })
.def("predict",
[](pipeline::PPOCRv3& self, pybind11::array& data) {
auto mat = PyArrayToCvMat(data);
vision::OCRResult res;
self.Predict(&mat, &res);
return res;
})
.def("batch_predict", [](pipeline::PPOCRv3& self, std::vector<pybind11::array>& data) {
.def("batch_predict",
[](pipeline::PPOCRv3& self, std::vector<pybind11::array>& data) {
std::vector<cv::Mat> images;
for (size_t i = 0; i < data.size(); ++i) {
images.push_back(PyArrayToCvMat(data[i]));
@@ -50,26 +51,57 @@ void BindPPOCRv3(pybind11::module& m) {
void BindPPOCRv2(pybind11::module& m) {
// PPOCRv2
pybind11::class_<pipeline::PPOCRv2, FastDeployModel>(
m, "PPOCRv2")
pybind11::class_<pipeline::PPOCRv2, FastDeployModel>(m, "PPOCRv2")
.def(pybind11::init<fastdeploy::vision::ocr::DBDetector*,
fastdeploy::vision::ocr::Classifier*,
fastdeploy::vision::ocr::Recognizer*>())
.def(pybind11::init<fastdeploy::vision::ocr::DBDetector*,
fastdeploy::vision::ocr::Recognizer*>())
.def_property("cls_batch_size", &pipeline::PPOCRv2::GetClsBatchSize, &pipeline::PPOCRv2::SetClsBatchSize)
.def_property("rec_batch_size", &pipeline::PPOCRv2::GetRecBatchSize, &pipeline::PPOCRv2::SetRecBatchSize)
.def("clone", [](pipeline::PPOCRv2& self) {
return self.Clone();
})
.def("predict", [](pipeline::PPOCRv2& self,
pybind11::array& data) {
.def_property("cls_batch_size", &pipeline::PPOCRv2::GetClsBatchSize,
&pipeline::PPOCRv2::SetClsBatchSize)
.def_property("rec_batch_size", &pipeline::PPOCRv2::GetRecBatchSize,
&pipeline::PPOCRv2::SetRecBatchSize)
.def("clone", [](pipeline::PPOCRv2& self) { return self.Clone(); })
.def("predict",
[](pipeline::PPOCRv2& self, pybind11::array& data) {
auto mat = PyArrayToCvMat(data);
vision::OCRResult res;
self.Predict(&mat, &res);
return res;
})
.def("batch_predict", [](pipeline::PPOCRv2& self, std::vector<pybind11::array>& data) {
.def("batch_predict",
[](pipeline::PPOCRv2& self, std::vector<pybind11::array>& data) {
std::vector<cv::Mat> images;
for (size_t i = 0; i < data.size(); ++i) {
images.push_back(PyArrayToCvMat(data[i]));
}
std::vector<vision::OCRResult> results;
self.BatchPredict(images, &results);
return results;
});
}
void BindPPStructureV2Table(pybind11::module& m) {
// PPStructureV2Table
pybind11::class_<pipeline::PPStructureV2Table, FastDeployModel>(
m, "PPStructureV2Table")
.def(pybind11::init<fastdeploy::vision::ocr::DBDetector*,
fastdeploy::vision::ocr::Recognizer*,
fastdeploy::vision::ocr::StructureV2Table*>())
.def_property("rec_batch_size",
&pipeline::PPStructureV2Table::GetRecBatchSize,
&pipeline::PPStructureV2Table::SetRecBatchSize)
.def("clone",
[](pipeline::PPStructureV2Table& self) { return self.Clone(); })
.def("predict",
[](pipeline::PPStructureV2Table& self, pybind11::array& data) {
auto mat = PyArrayToCvMat(data);
vision::OCRResult res;
self.Predict(&mat, &res);
return res;
})
.def("batch_predict", [](pipeline::PPStructureV2Table& self,
std::vector<pybind11::array>& data) {
std::vector<cv::Mat> images;
for (size_t i = 0; i < data.size(); ++i) {
images.push_back(PyArrayToCvMat(data[i]));

View File

@@ -0,0 +1,233 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/vision/ocr/ppocr/ppstructurev2_table.h"
#include "fastdeploy/utils/perf.h"
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
namespace fastdeploy {
namespace pipeline {
PPStructureV2Table::PPStructureV2Table(
fastdeploy::vision::ocr::DBDetector* det_model,
fastdeploy::vision::ocr::Recognizer* rec_model,
fastdeploy::vision::ocr::StructureV2Table* table_model)
: detector_(det_model), recognizer_(rec_model), table_(table_model) {
Initialized();
}
bool PPStructureV2Table::SetRecBatchSize(int rec_batch_size) {
if (rec_batch_size < -1 || rec_batch_size == 0) {
FDERROR << "batch_size > 0 or batch_size == -1." << std::endl;
return false;
}
rec_batch_size_ = rec_batch_size;
return true;
}
int PPStructureV2Table::GetRecBatchSize() { return rec_batch_size_; }
bool PPStructureV2Table::Initialized() const {
if (detector_ != nullptr && !detector_->Initialized()) {
return false;
}
if (recognizer_ != nullptr && !recognizer_->Initialized()) {
return false;
}
if (table_ != nullptr && !table_->Initialized()) {
return false;
}
return true;
}
std::unique_ptr<PPStructureV2Table> PPStructureV2Table::Clone() const {
std::unique_ptr<PPStructureV2Table> clone_model =
utils::make_unique<PPStructureV2Table>(PPStructureV2Table(*this));
clone_model->detector_ = detector_->Clone().release();
clone_model->recognizer_ = recognizer_->Clone().release();
clone_model->table_ = table_->Clone().release();
return clone_model;
}
bool PPStructureV2Table::Predict(cv::Mat* img,
fastdeploy::vision::OCRResult* result) {
return Predict(*img, result);
}
bool PPStructureV2Table::Predict(const cv::Mat& img,
fastdeploy::vision::OCRResult* result) {
std::vector<fastdeploy::vision::OCRResult> batch_result(1);
bool success = BatchPredict({img}, &batch_result);
if (!success) {
return success;
}
*result = std::move(batch_result[0]);
return true;
};
bool PPStructureV2Table::BatchPredict(
const std::vector<cv::Mat>& images,
std::vector<fastdeploy::vision::OCRResult>* batch_result) {
batch_result->clear();
batch_result->resize(images.size());
std::vector<std::vector<std::array<int, 8>>> batch_boxes(images.size());
if (!detector_->BatchPredict(images, &batch_boxes)) {
FDERROR << "There's error while detecting image in PPOCR." << std::endl;
return false;
}
for (int i_batch = 0; i_batch < batch_boxes.size(); ++i_batch) {
vision::ocr::SortBoxes(&(batch_boxes[i_batch]));
(*batch_result)[i_batch].boxes = batch_boxes[i_batch];
}
for (int i_batch = 0; i_batch < images.size(); ++i_batch) {
fastdeploy::vision::OCRResult& ocr_result = (*batch_result)[i_batch];
// Get croped images by detection result
const std::vector<std::array<int, 8>>& boxes = ocr_result.boxes;
const cv::Mat& img = images[i_batch];
std::vector<cv::Mat> image_list;
if (boxes.size() == 0) {
image_list.emplace_back(img);
} else {
image_list.resize(boxes.size());
for (size_t i_box = 0; i_box < boxes.size(); ++i_box) {
image_list[i_box] = vision::ocr::GetRotateCropImage(img, boxes[i_box]);
}
}
std::vector<int32_t>* cls_labels_ptr = &ocr_result.cls_labels;
std::vector<float>* cls_scores_ptr = &ocr_result.cls_scores;
std::vector<std::string>* text_ptr = &ocr_result.text;
std::vector<float>* rec_scores_ptr = &ocr_result.rec_scores;
std::vector<float> width_list;
for (int i = 0; i < image_list.size(); i++) {
width_list.push_back(float(image_list[i].cols) / image_list[i].rows);
}
std::vector<int> indices = vision::ocr::ArgSort(width_list);
for (size_t start_index = 0; start_index < image_list.size();
start_index += rec_batch_size_) {
size_t end_index =
std::min(start_index + rec_batch_size_, image_list.size());
if (!recognizer_->BatchPredict(image_list, text_ptr, rec_scores_ptr,
start_index, end_index, indices)) {
FDERROR << "There's error while recognizing image in PPOCR."
<< std::endl;
return false;
}
}
}
if (!table_->BatchPredict(images, batch_result)) {
FDERROR << "There's error while recognizing tables in images." << std::endl;
return false;
}
for (int i_batch = 0; i_batch < batch_boxes.size(); ++i_batch) {
fastdeploy::vision::OCRResult& ocr_result = (*batch_result)[i_batch];
std::vector<std::vector<std::string>> matched(ocr_result.table_boxes.size(),
std::vector<std::string>());
std::vector<int> ocr_box;
std::vector<int> structure_box;
for (int i = 0; i < ocr_result.boxes.size(); i++) {
ocr_box = vision::ocr::Xyxyxyxy2Xyxy(ocr_result.boxes[i]);
ocr_box[0] -= 1;
ocr_box[1] -= 1;
ocr_box[2] += 1;
ocr_box[3] += 1;
std::vector<std::vector<float>> dis_list(ocr_result.table_boxes.size(),
std::vector<float>(3, 100000.0));
for (int j = 0; j < ocr_result.table_boxes.size(); j++) {
structure_box = vision::ocr::Xyxyxyxy2Xyxy(ocr_result.table_boxes[j]);
dis_list[j][0] = vision::ocr::Dis(ocr_box, structure_box);
dis_list[j][1] = 1 - vision::ocr::Iou(ocr_box, structure_box);
dis_list[j][2] = j;
}
// find min dis idx
std::sort(dis_list.begin(), dis_list.end(), vision::ocr::ComparisonDis);
matched[dis_list[0][2]].push_back(ocr_result.text[i]);
}
// get pred html
std::string html_str = "";
int td_tag_idx = 0;
auto structure_html_tags = ocr_result.table_structure;
for (int i = 0; i < structure_html_tags.size(); i++) {
if (structure_html_tags[i].find("</td>") != std::string::npos) {
if (structure_html_tags[i].find("<td></td>") != std::string::npos) {
html_str += "<td>";
}
if (matched[td_tag_idx].size() > 0) {
bool b_with = false;
if (matched[td_tag_idx][0].find("<b>") != std::string::npos &&
matched[td_tag_idx].size() > 1) {
b_with = true;
html_str += "<b>";
}
for (int j = 0; j < matched[td_tag_idx].size(); j++) {
std::string content = matched[td_tag_idx][j];
if (matched[td_tag_idx].size() > 1) {
// remove blank, <b> and </b>
if (content.length() > 0 && content.at(0) == ' ') {
content = content.substr(0);
}
if (content.length() > 2 && content.substr(0, 3) == "<b>") {
content = content.substr(3);
}
if (content.length() > 4 &&
content.substr(content.length() - 4) == "</b>") {
content = content.substr(0, content.length() - 4);
}
if (content.empty()) {
continue;
}
// add blank
if (j != matched[td_tag_idx].size() - 1 &&
content.at(content.length() - 1) != ' ') {
content += ' ';
}
}
html_str += content;
}
if (b_with) {
html_str += "</b>";
}
}
if (structure_html_tags[i].find("<td></td>") != std::string::npos) {
html_str += "</td>";
} else {
html_str += structure_html_tags[i];
}
td_tag_idx += 1;
} else {
html_str += structure_html_tags[i];
}
}
(*batch_result)[i_batch].table_html = html_str;
}
return true;
}
} // namespace pipeline
} // namespace fastdeploy

View File

@@ -0,0 +1,93 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <vector>
#include "fastdeploy/fastdeploy_model.h"
#include "fastdeploy/vision/common/processors/transform.h"
#include "fastdeploy/vision/common/result.h"
#include "fastdeploy/vision/ocr/ppocr/structurev2_table.h"
#include "fastdeploy/vision/ocr/ppocr/dbdetector.h"
#include "fastdeploy/vision/ocr/ppocr/recognizer.h"
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_postprocess_op.h"
#include "fastdeploy/utils/unique_ptr.h"
namespace fastdeploy {
/** \brief This pipeline can launch detection model, classification model and recognition model sequentially. All OCR pipeline APIs are defined inside this namespace.
*
*/
namespace pipeline {
/*! @brief PPStructureV2Table is used to load PP-OCRv2 series models provided by PaddleOCR.
*/
class FASTDEPLOY_DECL PPStructureV2Table : public FastDeployModel {
public:
/** \brief Set up the detection model path, recognition model path and table model path respectively.
*
* \param[in] det_model Path of detection model, e.g ./ch_PP-OCRv2_det_infer
* \param[in] rec_model Path of recognition model, e.g ./ch_PP-OCRv2_rec_infer
* \param[in] table_model Path of table recognition model, e.g ./en_ppstructure_mobile_v2.0_SLANet_infer
*/
PPStructureV2Table(fastdeploy::vision::ocr::DBDetector* det_model,
fastdeploy::vision::ocr::Recognizer* rec_model,
fastdeploy::vision::ocr::StructureV2Table* table_model);
/** \brief Clone a new PPStructureV2Table with less memory usage when multiple instances of the same model are created
*
* \return new PPStructureV2Table* type unique pointer
*/
std::unique_ptr<PPStructureV2Table> Clone() const;
/** \brief Predict the input image and get OCR result.
*
* \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format.
* \param[in] result The output OCR result will be writen to this structure.
* \return true if the prediction successed, otherwise false.
*/
virtual bool Predict(cv::Mat* img, fastdeploy::vision::OCRResult* result);
virtual bool Predict(const cv::Mat& img,
fastdeploy::vision::OCRResult* result);
/** \brief BatchPredict the input image and get OCR result.
*
* \param[in] images The list of input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format.
* \param[in] batch_result The output list of OCR result will be writen to this structure.
* \return true if the prediction successed, otherwise false.
*/
virtual bool BatchPredict(const std::vector<cv::Mat>& images,
std::vector<fastdeploy::vision::OCRResult>* batch_result);
bool Initialized() const override;
bool SetRecBatchSize(int rec_batch_size);
int GetRecBatchSize();
protected:
fastdeploy::vision::ocr::DBDetector* detector_ = nullptr;
fastdeploy::vision::ocr::Recognizer* recognizer_ = nullptr;
fastdeploy::vision::ocr::StructureV2Table* table_ = nullptr;
private:
int rec_batch_size_ = 6;
};
namespace application {
namespace ocrsystem {
typedef pipeline::PPStructureV2Table PPStructureV2TableSystem;
} // namespace ocrsystem
} // namespace application
} // namespace pipeline
} // namespace fastdeploy

View File

@@ -0,0 +1,133 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/vision/ocr/ppocr/structurev2_table.h"
#include "fastdeploy/utils/perf.h"
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
namespace fastdeploy {
namespace vision {
namespace ocr {
StructureV2Table::StructureV2Table() {}
StructureV2Table::StructureV2Table(const std::string& model_file,
const std::string& params_file,
const std::string& table_char_dict_path,
const RuntimeOption& custom_option,
const ModelFormat& model_format)
: postprocessor_(table_char_dict_path) {
if (model_format == ModelFormat::ONNX) {
valid_cpu_backends = {Backend::ORT, Backend::OPENVINO};
valid_gpu_backends = {Backend::ORT, Backend::TRT};
} else {
valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO,
Backend::LITE};
valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
valid_kunlunxin_backends = {Backend::LITE};
valid_ascend_backends = {Backend::LITE};
valid_sophgonpu_backends = {Backend::SOPHGOTPU};
valid_rknpu_backends = {Backend::RKNPU2};
}
runtime_option = custom_option;
runtime_option.model_format = model_format;
runtime_option.model_file = model_file;
runtime_option.params_file = params_file;
initialized = Initialize();
}
// Init
bool StructureV2Table::Initialize() {
if (!InitRuntime()) {
FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
return false;
}
return true;
}
std::unique_ptr<StructureV2Table> StructureV2Table::Clone() const {
std::unique_ptr<StructureV2Table> clone_model =
utils::make_unique<StructureV2Table>(StructureV2Table(*this));
clone_model->SetRuntime(clone_model->CloneRuntime());
return clone_model;
}
bool StructureV2Table::Predict(const cv::Mat& img,
std::vector<std::array<int, 8>>* boxes_result,
std::vector<std::string>* structure_result) {
std::vector<std::vector<std::array<int, 8>>> det_results;
std::vector<std::vector<std::string>> structure_results;
if (!BatchPredict({img}, &det_results, &structure_results)) {
return false;
}
*boxes_result = std::move(det_results[0]);
*structure_result = std::move(structure_results[0]);
return true;
}
bool StructureV2Table::Predict(const cv::Mat& img,
vision::OCRResult* ocr_result) {
if (!Predict(img, &(ocr_result->table_boxes),
&(ocr_result->table_structure))) {
return false;
}
return true;
}
bool StructureV2Table::BatchPredict(
const std::vector<cv::Mat>& images,
std::vector<vision::OCRResult>* ocr_results) {
std::vector<std::vector<std::array<int, 8>>> det_results;
std::vector<std::vector<std::string>> structure_results;
if (!BatchPredict(images, &det_results, &structure_results)) {
return false;
}
ocr_results->resize(det_results.size());
for (int i = 0; i < det_results.size(); i++) {
(*ocr_results)[i].table_boxes = std::move(det_results[i]);
(*ocr_results)[i].table_structure = std::move(structure_results[i]);
}
return true;
}
bool StructureV2Table::BatchPredict(
const std::vector<cv::Mat>& images,
std::vector<std::vector<std::array<int, 8>>>* det_results,
std::vector<std::vector<std::string>>* structure_results) {
std::vector<FDMat> fd_images = WrapMat(images);
if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) {
FDERROR << "Failed to preprocess input image." << std::endl;
return false;
}
auto batch_det_img_info = preprocessor_.GetBatchImgInfo();
reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
if (!Infer(reused_input_tensors_, &reused_output_tensors_)) {
FDERROR << "Failed to inference by runtime." << std::endl;
return false;
}
if (!postprocessor_.Run(reused_output_tensors_, det_results,
structure_results, *batch_det_img_info)) {
FDERROR << "Failed to postprocess the inference cls_results by runtime."
<< std::endl;
return false;
}
return true;
}
} // namespace ocr
} // namespace vision
} // namespace fastdeploy

View File

@@ -0,0 +1,113 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "fastdeploy/fastdeploy_model.h"
#include "fastdeploy/vision/common/processors/transform.h"
#include "fastdeploy/vision/common/result.h"
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_postprocess_op.h"
#include "fastdeploy/vision/ocr/ppocr/structurev2_table_postprocessor.h"
#include "fastdeploy/vision/ocr/ppocr/structurev2_table_preprocessor.h"
#include "fastdeploy/utils/unique_ptr.h"
namespace fastdeploy {
namespace vision {
/** \brief All OCR series model APIs are defined inside this namespace
*
*/
namespace ocr {
/*! @brief DBDetector object is used to load the detection model provided by PaddleOCR.
*/
class FASTDEPLOY_DECL StructureV2Table : public FastDeployModel {
public:
StructureV2Table();
/** \brief Set path of model file, and the configuration of runtime
*
* \param[in] model_file Path of model file, e.g ./en_ppstructure_mobile_v2.0_SLANet_infer/model.pdmodel.
* \param[in] params_file Path of parameter file, e.g ./en_ppstructure_mobile_v2.0_SLANet_infer/model.pdiparams, if the model format is ONNX, this parameter will be ignored.
* \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in `valid_cpu_backends`.
* \param[in] model_format Model format of the loaded model, default is Paddle format.
*/
StructureV2Table(const std::string& model_file,
const std::string& params_file = "",
const std::string& table_char_dict_path = "",
const RuntimeOption& custom_option = RuntimeOption(),
const ModelFormat& model_format = ModelFormat::PADDLE);
/** \brief Clone a new StructureV2Table Recognizer with less memory usage when multiple instances of the same model are created
*
* \return new StructureV2Table* type unique pointer
*/
virtual std::unique_ptr<StructureV2Table> Clone() const;
/// Get model's name
std::string ModelName() const { return "ppocr/ocr_table"; }
/** \brief Predict the input image and get OCR detection model result.
*
* \param[in] img The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format.
* \param[in] boxes_result The output of OCR detection model result will be writen to this structure.
* \return true if the prediction is successed, otherwise false.
*/
virtual bool Predict(const cv::Mat& img,
std::vector<std::array<int, 8>>* boxes_result,
std::vector<std::string>* structure_result);
/** \brief Predict the input image and get OCR detection model result.
*
* \param[in] img The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format.
* \param[in] ocr_result The output of OCR detection model result will be writen to this structure.
* \return true if the prediction is successed, otherwise false.
*/
virtual bool Predict(const cv::Mat& img, vision::OCRResult* ocr_result);
/** \brief BatchPredict the input image and get OCR detection model result.
*
* \param[in] images The list input of image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format.
* \param[in] det_results The output of OCR detection model result will be writen to this structure.
* \return true if the prediction is successed, otherwise false.
*/
virtual bool BatchPredict(const std::vector<cv::Mat>& images,
std::vector<std::vector<std::array<int, 8>>>* det_results,
std::vector<std::vector<std::string>>* structure_results);
/** \brief BatchPredict the input image and get OCR detection model result.
*
* \param[in] images The list input of image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format.
* \param[in] ocr_results The output of OCR detection model result will be writen to this structure.
* \return true if the prediction is successed, otherwise false.
*/
virtual bool BatchPredict(const std::vector<cv::Mat>& images,
std::vector<vision::OCRResult>* ocr_results);
/// Get preprocessor reference of StructureV2TablePreprocessor
virtual StructureV2TablePreprocessor& GetPreprocessor() {
return preprocessor_;
}
/// Get postprocessor reference of StructureV2TablePostprocessor
virtual StructureV2TablePostprocessor& GetPostprocessor() {
return postprocessor_;
}
private:
bool Initialize();
StructureV2TablePreprocessor preprocessor_;
StructureV2TablePostprocessor postprocessor_;
};
} // namespace ocr
} // namespace vision
} // namespace fastdeploy

View File

@@ -0,0 +1,170 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/vision/ocr/ppocr/structurev2_table_postprocessor.h"
#include "fastdeploy/utils/perf.h"
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
namespace fastdeploy {
namespace vision {
namespace ocr {
StructureV2TablePostprocessor::StructureV2TablePostprocessor() {
initialized_ = false;
}
StructureV2TablePostprocessor::StructureV2TablePostprocessor(
const std::string& dict_path) {
std::ifstream in(dict_path);
FDASSERT(in, "Cannot open file %s to read.", dict_path.c_str());
std::string line;
dict_character.clear();
dict_character.push_back("sos"); // add special character
while (getline(in, line)) {
dict_character.push_back(line);
}
if (merge_no_span_structure) {
if (std::find(dict_character.begin(), dict_character.end(), "<td></td>") ==
dict_character.end()) {
dict_character.push_back("<td></td>");
}
for (auto it = dict_character.begin(); it != dict_character.end();) {
if (*it == "<td>") {
it = dict_character.erase(it);
} else {
++it;
}
}
}
dict_character.push_back("eos"); // add special character
dict.clear();
for (size_t i = 0; i < dict_character.size(); i++) {
dict[dict_character[i]] = int(i);
if (dict_character[i] == "beg") {
ignore_beg_token_idx = i;
} else if (dict_character[i] == "end") {
ignore_end_token_idx = i;
}
}
dict_end_idx = dict_character.size() - 1;
initialized_ = true;
}
bool StructureV2TablePostprocessor::SingleBatchPostprocessor(
const float* structure_probs, const float* bbox_preds, size_t slice_dim,
size_t prob_dim, size_t box_dim, int img_width, int img_height,
std::vector<std::array<int, 8>>* boxes_result,
std::vector<std::string>* structure_list_result) {
structure_list_result->push_back("<html>");
structure_list_result->push_back("<body>");
structure_list_result->push_back("<table>");
for (int i = 0; i < slice_dim; i++) {
int structure_idx = 0;
float structure_prob = structure_probs[i * prob_dim];
for (int j = 0; j < prob_dim; j++) {
if (structure_probs[i * prob_dim + j] > structure_prob) {
structure_prob = structure_probs[i * prob_dim + j];
structure_idx = j;
}
}
if (structure_idx > 0 && structure_idx == dict_end_idx) break;
if (structure_idx == ignore_end_token_idx ||
structure_idx == ignore_beg_token_idx)
continue;
std::string text = dict_character[structure_idx];
if (std::find(td_tokens.begin(), td_tokens.end(), text) !=
td_tokens.end()) {
std::array<int, 8> bbox;
// box dim: en->4, ch->8
if (box_dim == 4) {
bbox[0] = bbox_preds[i * box_dim] * img_width;
bbox[1] = bbox_preds[i * box_dim + 1] * img_height;
bbox[2] = bbox_preds[i * box_dim + 2] * img_width;
bbox[3] = bbox_preds[i * box_dim + 1] * img_height;
bbox[4] = bbox_preds[i * box_dim + 2] * img_width;
bbox[5] = bbox_preds[i * box_dim + 3] * img_height;
bbox[6] = bbox_preds[i * box_dim] * img_width;
bbox[7] = bbox_preds[i * box_dim + 3] * img_height;
} else {
for (int k = 0; k < 8; k++) {
float bbox_pred = bbox_preds[i * box_dim + k];
bbox[k] =
int(k % 2 == 0 ? bbox_pred * img_width : bbox_pred * img_height);
}
}
boxes_result->push_back(bbox);
}
structure_list_result->push_back(text);
}
structure_list_result->push_back("</table>");
structure_list_result->push_back("</body>");
structure_list_result->push_back("</html>");
return true;
}
bool StructureV2TablePostprocessor::Run(
const std::vector<FDTensor>& tensors,
std::vector<std::vector<std::array<int, 8>>>* bbox_batch_list,
std::vector<std::vector<std::string>>* structure_batch_list,
const std::vector<std::array<int, 4>>& batch_det_img_info) {
// Table have 2 output tensors.
const FDTensor& structure_probs = tensors[1];
const FDTensor& bbox_preds = tensors[0];
const float* structure_probs_data =
reinterpret_cast<const float*>(structure_probs.Data());
size_t structure_probs_length =
accumulate(structure_probs.shape.begin() + 1, structure_probs.shape.end(),
1, std::multiplies<int>());
const float* bbox_preds_data =
reinterpret_cast<const float*>(bbox_preds.Data());
size_t bbox_preds_length =
accumulate(bbox_preds.shape.begin() + 1, bbox_preds.shape.end(), 1,
std::multiplies<int>());
size_t batch = bbox_preds.shape[0];
size_t slice_dim = bbox_preds.shape[1];
size_t prob_dim = structure_probs.shape[2];
size_t box_dim = bbox_preds.shape[2];
bbox_batch_list->resize(batch);
structure_batch_list->resize(batch);
for (int i_batch = 0; i_batch < batch; ++i_batch) {
SingleBatchPostprocessor(
structure_probs_data, bbox_preds_data, slice_dim, prob_dim, box_dim,
batch_det_img_info[i_batch][0], batch_det_img_info[i_batch][1],
&bbox_batch_list->at(i_batch), &structure_batch_list->at(i_batch));
structure_probs_data = structure_probs_data + structure_probs_length;
bbox_preds_data = bbox_preds_data + bbox_preds_length;
}
return true;
}
} // namespace ocr
} // namespace vision
} // namespace fastdeploy

View File

@@ -0,0 +1,71 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "fastdeploy/vision/common/processors/transform.h"
#include "fastdeploy/vision/common/result.h"
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_postprocess_op.h"
namespace fastdeploy {
namespace vision {
namespace ocr {
/*! @brief Postprocessor object for DBDetector serials model.
*/
class FASTDEPLOY_DECL StructureV2TablePostprocessor {
public:
StructureV2TablePostprocessor();
/** \brief Create a postprocessor instance for Recognizer serials model
*
* \param[in] label_path The path of label_dict
*/
explicit StructureV2TablePostprocessor(const std::string& dict_path);
/** \brief Process the result of runtime and fill to RecognizerResult
*
* \param[in] tensors The inference result from runtime
* \param[in] texts The output text results of recognizer
* \param[in] rec_scores The output score results of recognizer
* \return true if the postprocess successed, otherwise false
*/
bool Run(const std::vector<FDTensor>& tensors,
std::vector<std::vector<std::array<int, 8>>>* bbox_batch_list,
std::vector<std::vector<std::string>>* structure_batch_list,
const std::vector<std::array<int, 4>>& batch_det_img_info);
private:
PostProcessor util_post_processor_;
bool SingleBatchPostprocessor(const float* structure_probs,
const float* bbox_preds,
size_t slice_dim,
size_t prob_dim,
size_t box_dim,
int img_width,
int img_height,
std::vector<std::array<int, 8>>* boxes_result,
std::vector<std::string>* structure_list_result);
bool merge_no_span_structure{true};
std::vector<std::string> dict_character;
std::vector<std::string> td_tokens{"<td>", "<td", "<td></td>"};
std::map<std::string, int> dict;
int ignore_beg_token_idx;
int ignore_end_token_idx;
int dict_end_idx;
bool initialized_ = false;
};
} // namespace ocr
} // namespace vision
} // namespace fastdeploy

View File

@@ -0,0 +1,105 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/vision/ocr/ppocr/structurev2_table_preprocessor.h"
#include "fastdeploy/function/concat.h"
#include "fastdeploy/utils/perf.h"
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
namespace fastdeploy {
namespace vision {
namespace ocr {
StructureV2TablePreprocessor::StructureV2TablePreprocessor() {
resize_op_ = std::make_shared<Resize>(-1, -1);
std::vector<float> value = {0, 0, 0};
pad_op_ = std::make_shared<Pad>(0, 0, 0, 0, value);
std::vector<float> mean = {0.485f, 0.456f, 0.406f};
std::vector<float> std = {0.229f, 0.224f, 0.225f};
normalize_op_ = std::make_shared<Normalize>(mean, std, true);
hwc2chw_op_ = std::make_shared<HWC2CHW>();
}
void StructureV2TablePreprocessor::StructureV2TableResizeImage(FDMat* mat,
int batch_idx) {
float img_h = float(rec_image_shape_[1]);
float img_w = float(rec_image_shape_[2]);
float width = float(mat->Width());
float height = float(mat->Height());
float ratio = max_len / (std::max(height, width) * 1.0);
int resize_h = int(height * ratio);
int resize_w = int(width * ratio);
resize_op_->SetWidthAndHeight(resize_w, resize_h);
(*resize_op_)(mat);
(*normalize_op_)(mat);
pad_op_->SetPaddingSize(0, int(max_len - resize_h), 0,
int(max_len - resize_w));
(*pad_op_)(mat);
(*hwc2chw_op_)(mat);
batch_det_img_info_[batch_idx] = {int(width), int(height), resize_w,
resize_h};
}
bool StructureV2TablePreprocessor::Run(std::vector<FDMat>* images,
std::vector<FDTensor>* outputs,
size_t start_index, size_t end_index,
const std::vector<int>& indices) {
if (images->size() == 0 || end_index <= start_index ||
end_index > images->size()) {
FDERROR << "images->size() or index error. Correct is: 0 <= start_index < "
"end_index <= images->size()"
<< std::endl;
return false;
}
std::vector<FDMat> mats(end_index - start_index);
for (size_t i = start_index; i < end_index; ++i) {
size_t real_index = i;
if (indices.size() != 0) {
real_index = indices[i];
}
mats[i - start_index] = images->at(real_index);
}
return Run(&mats, outputs);
}
bool StructureV2TablePreprocessor::Apply(FDMatBatch* image_batch,
std::vector<FDTensor>* outputs) {
batch_det_img_info_.clear();
batch_det_img_info_.resize(image_batch->mats->size());
for (size_t i = 0; i < image_batch->mats->size(); ++i) {
FDMat* mat = &(image_batch->mats->at(i));
StructureV2TableResizeImage(mat, i);
}
// Only have 1 output Tensor.
outputs->resize(1);
// Get the NCHW tensor
FDTensor* tensor = image_batch->Tensor();
(*outputs)[0].SetExternalData(tensor->Shape(), tensor->Dtype(),
tensor->Data(), tensor->device,
tensor->device_id);
return true;
}
} // namespace ocr
} // namespace vision
} // namespace fastdeploy

View File

@@ -0,0 +1,74 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "fastdeploy/vision/common/processors/transform.h"
#include "fastdeploy/vision/common/processors/manager.h"
#include "fastdeploy/vision/common/result.h"
namespace fastdeploy {
namespace vision {
namespace ocr {
/*! @brief Preprocessor object for table model.
*/
class FASTDEPLOY_DECL StructureV2TablePreprocessor : public ProcessorManager {
public:
StructureV2TablePreprocessor();
using ProcessorManager::Run;
/** \brief Process the input image and prepare input tensors for runtime
*
* \param[in] images The input data list, all the elements are FDMat
* \param[in] outputs The output tensors which will be fed into runtime
* \return true if the preprocess successed, otherwise false
*/
bool Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
size_t start_index, size_t end_index,
const std::vector<int>& indices);
/** \brief Implement the virtual function of ProcessorManager, Apply() is the
* body of Run(). Apply() contains the main logic of preprocessing, Run() is
* called by users to execute preprocessing
*
* \param[in] image_batch The input image batch
* \param[in] outputs The output tensors which will feed in runtime
* \return true if the preprocess successed, otherwise false
*/
virtual bool Apply(FDMatBatch* image_batch, std::vector<FDTensor>* outputs);
/// Get the image info of the last batch, return a list of array
/// {image width, image height, resize width, resize height}
const std::vector<std::array<int, 4>>* GetBatchImgInfo() {
return &batch_det_img_info_;
}
private:
void StructureV2TableResizeImage(FDMat* mat, int batch_idx);
// for recording the switch of hwc2chw
bool disable_permute_ = false;
// for recording the switch of normalize
bool disable_normalize_ = false;
int max_len = 488;
std::vector<int> rec_image_shape_ = {3, max_len, max_len};
bool static_shape_infer_ = false;
std::shared_ptr<Resize> resize_op_;
std::shared_ptr<Pad> pad_op_;
std::shared_ptr<Normalize> normalize_op_;
std::shared_ptr<HWC2CHW> hwc2chw_op_;
std::vector<std::array<int, 4>> batch_det_img_info_;
};
} // namespace ocr
} // namespace vision
} // namespace fastdeploy

View File

@@ -0,0 +1,89 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
namespace fastdeploy {
namespace vision {
namespace ocr {
std::vector<int> Xyxyxyxy2Xyxy(std::array<int, 8> &box) {
int x_collect[4] = {box[0], box[2], box[4], box[6]};
int y_collect[4] = {box[1], box[3], box[5], box[7]};
int left = int(*std::min_element(x_collect, x_collect + 4));
int right = int(*std::max_element(x_collect, x_collect + 4));
int top = int(*std::min_element(y_collect, y_collect + 4));
int bottom = int(*std::max_element(y_collect, y_collect + 4));
std::vector<int> box1(4, 0);
box1[0] = left;
box1[1] = top;
box1[2] = right;
box1[3] = bottom;
return box1;
}
float Dis(std::vector<int> &box1, std::vector<int> &box2) {
float x1_1 = float(box1[0]);
float y1_1 = float(box1[1]);
float x2_1 = float(box1[2]);
float y2_1 = float(box1[3]);
float x1_2 = float(box2[0]);
float y1_2 = float(box2[1]);
float x2_2 = float(box2[2]);
float y2_2 = float(box2[3]);
float dis = std::abs(x1_2 - x1_1) + std::abs(y1_2 - y1_1) +
std::abs(x2_2 - x2_1) + std::abs(y2_2 - y2_1);
float dis_2 = std::abs(x1_2 - x1_1) + std::abs(y1_2 - y1_1);
float dis_3 = std::abs(x2_2 - x2_1) + std::abs(y2_2 - y2_1);
return dis + std::min(dis_2, dis_3);
}
float Iou(std::vector<int> &box1, std::vector<int> &box2) {
int area1 = std::max(0, box1[2] - box1[0]) * std::max(0, box1[3] - box1[1]);
int area2 = std::max(0, box2[2] - box2[0]) * std::max(0, box2[3] - box2[1]);
// computing the sum_area
int sum_area = area1 + area2;
// find the each point of intersect rectangle
int x1 = std::max(box1[0], box2[0]);
int y1 = std::max(box1[1], box2[1]);
int x2 = std::min(box1[2], box2[2]);
int y2 = std::min(box1[3], box2[3]);
// judge if there is an intersect
if (y1 >= y2 || x1 >= x2) {
return 0.0;
} else {
int intersect = (x2 - x1) * (y2 - y1);
return intersect / (sum_area - intersect + 0.00000001);
}
}
bool ComparisonDis(const std::vector<float> &dis1,
const std::vector<float> &dis2) {
if (dis1[1] < dis2[1]) {
return true;
} else if (dis1[1] == dis2[1]) {
return dis1[0] < dis2[0];
} else {
return false;
}
}
} // namespace ocr
} // namespace vision
} // namespace fastdeploy

View File

@@ -34,6 +34,15 @@ FASTDEPLOY_DECL void SortBoxes(std::vector<std::array<int, 8>>* boxes);
FASTDEPLOY_DECL std::vector<int> ArgSort(const std::vector<float> &array);
FASTDEPLOY_DECL std::vector<int> Xyxyxyxy2Xyxy(std::array<int, 8> &box);
FASTDEPLOY_DECL float Dis(std::vector<int> &box1, std::vector<int> &box2);
FASTDEPLOY_DECL float Iou(std::vector<int> &box1, std::vector<int> &box2);
FASTDEPLOY_DECL bool ComparisonDis(const std::vector<float> &dis1,
const std::vector<float> &dis2);
} // namespace ocr
} // namespace vision
} // namespace fastdeploy

View File

@@ -648,6 +648,107 @@ class Recognizer(FastDeployModel):
self._model.preprocessor.rec_image_shape = value
class StructureV2TablePreprocessor:
def __init__(self):
"""Create a preprocessor for StructureV2TableModel
"""
self._preprocessor = C.vision.ocr.StructureV2TablePreprocessor()
def run(self, input_ims):
"""Preprocess input images for StructureV2TableModel
:param: input_ims: (list of numpy.ndarray)The input image
:return: list of FDTensor
"""
return self._preprocessor.run(input_ims)
class StructureV2TablePostprocessor:
def __init__(self):
"""Create a postprocessor for StructureV2TableModel
"""
self._postprocessor = C.vision.ocr.StructureV2TablePostprocessor()
def run(self, runtime_results):
"""Postprocess the runtime results for StructureV2TableModel
:param: runtime_results: (list of FDTensor or list of pyArray)The output FDTensor results from runtime
:return: list of Result(If the runtime_results is predict by batched samples, the length of this list equals to the batch size)
"""
return self._postprocessor.run(runtime_results)
class StructureV2Table(FastDeployModel):
def __init__(self,
model_file="",
params_file="",
table_char_dict_path="",
runtime_option=None,
model_format=ModelFormat.PADDLE):
"""Load OCR StructureV2Table model provided by PaddleOCR.
:param model_file: (str)Path of model file, e.g ./ch_ppocr_mobile_v2.0_cls_infer/model.pdmodel.
:param params_file: (str)Path of parameter file, e.g ./ch_ppocr_mobile_v2.0_cls_infer/model.pdiparams, if the model format is ONNX, this parameter will be ignored.
:param runtime_option: (fastdeploy.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU.
:param model_format: (fastdeploy.ModelForamt)Model format of the loaded model.
"""
super(StructureV2Table, self).__init__(runtime_option)
if (len(model_file) == 0):
self._model = C.vision.ocr.StructureV2Table()
self._runnable = False
else:
self._model = C.vision.ocr.StructureV2Table(
model_file, params_file, table_char_dict_path,
self._runtime_option, model_format)
assert self.initialized, "Classifier initialize failed."
self._runnable = True
def clone(self):
"""Clone OCR StructureV2Table model object
:return: a new OCR StructureV2Table model object
"""
class StructureV2TableClone(StructureV2Table):
def __init__(self, model):
self._model = model
clone_model = StructureV2TableClone(self._model.clone())
return clone_model
def predict(self, input_image):
"""Predict an input image
:param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
:return: bbox, structure
"""
if self._runnable:
return self._model.predict(input_image)
return False
def batch_predict(self, images):
"""Predict a batch of input image
:param images: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
:return: list of bbox list, list of structure
"""
if self._runnable:
return self._model.batch_predict(images)
return False
@property
def preprocessor(self):
return self._model.preprocessor
@preprocessor.setter
def preprocessor(self, value):
self._model.preprocessor = value
@property
def postprocessor(self):
return self._model.postprocessor
@postprocessor.setter
def postprocessor(self, value):
self._model.postprocessor = value
class PPOCRv3(FastDeployModel):
def __init__(self, det_model=None, cls_model=None, rec_model=None):
"""Consruct a pipeline with text detector, direction classifier and text recognizer models
@@ -800,3 +901,58 @@ class PPOCRSystemv2(PPOCRv2):
def predict(self, input_image):
return super(PPOCRSystemv2, self).predict(input_image)
class PPStructureV2Table(FastDeployModel):
def __init__(self, det_model=None, rec_model=None, table_model=None):
"""Consruct a pipeline with text detector, text recognizer and table recognizer models
:param det_model: (FastDeployModel) The detection model object created by fastdeploy.vision.ocr.DBDetector.
:param rec_model: (FastDeployModel) The recognition model object created by fastdeploy.vision.ocr.Recognizer.
:param table_model: (FastDeployModel) The table recognition model object created by fastdeploy.vision.ocr.Table.
"""
assert det_model is not None and rec_model is not None and table_model is not None, "The det_model, rec_model and table_model cannot be None."
self.system_ = C.vision.ocr.PPStructureV2Table(
det_model._model,
rec_model._model,
table_model._model, )
def clone(self):
"""Clone PPStructureV2Table pipeline object
:return: a new PPStructureV2Table pipeline object
"""
class PPStructureV2TableClone(PPStructureV2Table):
def __init__(self, system):
self.system_ = system
clone_model = PPStructureV2TableClone(self.system_.clone())
return clone_model
def predict(self, input_image):
"""Predict an input image
:param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
:return: OCRResult
"""
return self.system_.predict(input_image)
def batch_predict(self, images):
"""Predict a batch of input image
:param images: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
:return: OCRBatchResult
"""
return self.system_.batch_predict(images)
class PPStructureV2TableSystem(PPStructureV2Table):
def __init__(self, det_model=None, rec_model=None, table_model=None):
logging.warning(
"DEPRECATED: fd.vision.ocr.PPStructureV2TableSystem is deprecated, "
"please use fd.vision.ocr.PPStructureV2Table instead.")
super(PPStructureV2TableSystem, self).__init__(det_model, rec_model,
table_model)
def predict(self, input_image):
return super(PPStructureV2TableSystem, self).predict(input_image)