diff --git a/benchmark/cpp/CMakeLists.txt b/benchmark/cpp/CMakeLists.txt index f9ceabf9d..41373e1a5 100755 --- a/benchmark/cpp/CMakeLists.txt +++ b/benchmark/cpp/CMakeLists.txt @@ -22,6 +22,7 @@ add_executable(benchmark_ppmatting ${PROJECT_SOURCE_DIR}/benchmark_ppmatting.cc) add_executable(benchmark_ppocr_det ${PROJECT_SOURCE_DIR}/benchmark_ppocr_det.cc) add_executable(benchmark_ppocr_cls ${PROJECT_SOURCE_DIR}/benchmark_ppocr_cls.cc) add_executable(benchmark_ppocr_rec ${PROJECT_SOURCE_DIR}/benchmark_ppocr_rec.cc) +add_executable(benchmark_structurev2_table ${PROJECT_SOURCE_DIR}/benchmark_structurev2_table.cc) add_executable(benchmark_ppyoloe_r ${PROJECT_SOURCE_DIR}/benchmark_ppyoloe_r.cc) add_executable(benchmark_ppyoloe_r_sophgo ${PROJECT_SOURCE_DIR}/benchmark_ppyoloe_r_sophgo.cc) add_executable(benchmark_ppyolo ${PROJECT_SOURCE_DIR}/benchmark_ppyolo.cc) @@ -55,6 +56,7 @@ if(UNIX AND (NOT APPLE) AND (NOT ANDROID)) target_link_libraries(benchmark_ppocr_det ${FASTDEPLOY_LIBS} gflags pthread) target_link_libraries(benchmark_ppocr_cls ${FASTDEPLOY_LIBS} gflags pthread) target_link_libraries(benchmark_ppocr_rec ${FASTDEPLOY_LIBS} gflags pthread) + target_link_libraries(benchmark_structurev2_table ${FASTDEPLOY_LIBS} gflags pthread) target_link_libraries(benchmark_ppyolo ${FASTDEPLOY_LIBS} gflags pthread) target_link_libraries(benchmark_yolov3 ${FASTDEPLOY_LIBS} gflags pthread) target_link_libraries(benchmark_fasterrcnn ${FASTDEPLOY_LIBS} gflags pthread) @@ -85,6 +87,7 @@ else() target_link_libraries(benchmark_ppocr_det ${FASTDEPLOY_LIBS} gflags) target_link_libraries(benchmark_ppocr_cls ${FASTDEPLOY_LIBS} gflags) target_link_libraries(benchmark_ppocr_rec ${FASTDEPLOY_LIBS} gflags) + target_link_libraries(benchmark_structurev2_table ${FASTDEPLOY_LIBS} gflags) target_link_libraries(benchmark_ppyolo ${FASTDEPLOY_LIBS} gflags) target_link_libraries(benchmark_yolov3 ${FASTDEPLOY_LIBS} gflags) target_link_libraries(benchmark_fasterrcnn ${FASTDEPLOY_LIBS} gflags) diff --git a/benchmark/cpp/benchmark_gpu.sh b/benchmark/cpp/benchmark_gpu.sh index 8a1fba5cd..c38f135cd 100755 --- a/benchmark/cpp/benchmark_gpu.sh +++ b/benchmark/cpp/benchmark_gpu.sh @@ -44,6 +44,7 @@ fi ./benchmark_ppocr_rec --model ch_PP-OCRv3_rec_infer --image rec_img.jpg --rec_label_file ppocr_keys_v1.txt --config_path $CONFIG_PATH ./benchmark_ppocr_det --model ch_PP-OCRv2_det_infer --image 12.jpg --config_path $CONFIG_PATH ./benchmark_ppocr_rec --model ch_PP-OCRv2_rec_infer --image rec_img.jpg --rec_label_file ppocr_keys_v1.txt --config_path $CONFIG_PATH +./benchmark_ppocr_table --model en_ppstructure_mobile_v2.0_SLANet_infer --image table.jpg --table_char_dict_path table_structure_dict.txt --config_path $CONFIG_PATH # PaddleDetection ./benchmark_ppyolov5 --model yolov5_s_300e_coco --image 000000014439.jpg --config_path $CONFIG_PATH diff --git a/benchmark/cpp/benchmark_structurev2_table.cc b/benchmark/cpp/benchmark_structurev2_table.cc new file mode 100755 index 000000000..6d0ca35db --- /dev/null +++ b/benchmark/cpp/benchmark_structurev2_table.cc @@ -0,0 +1,161 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "flags.h" +#include "macros.h" +#include "option.h" + +namespace vision = fastdeploy::vision; +namespace benchmark = fastdeploy::benchmark; + +DEFINE_string(table_char_dict_path, "", + "Path of table character dict of PPOCR."); +DEFINE_string(trt_shape, "1,3,48,10:4,3,48,320:8,3,48,2304", + "Set min/opt/max shape for trt/paddle_trt backend." + "eg:--trt_shape 1,3,48,10:4,3,48,320:8,3,48,2304"); + +int main(int argc, char *argv[]) { +#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION) + // Initialization + auto option = fastdeploy::RuntimeOption(); + if (!CreateRuntimeOption(&option, argc, argv, true)) { + return -1; + } + auto im = cv::imread(FLAGS_image); + std::unordered_map config_info; + benchmark::ResultManager::LoadBenchmarkConfig(FLAGS_config_path, + &config_info); + std::string model_name, params_name, config_name; + auto model_format = fastdeploy::ModelFormat::PADDLE; + if (!UpdateModelResourceName(&model_name, ¶ms_name, &config_name, + &model_format, config_info, false)) { + return -1; + } + auto model_file = FLAGS_model + sep + model_name; + auto params_file = FLAGS_model + sep + params_name; + if (config_info["backend"] == "paddle_trt") { + option.paddle_infer_option.collect_trt_shape = true; + } + if (config_info["backend"] == "paddle_trt" || + config_info["backend"] == "trt") { + std::vector> trt_shapes = + benchmark::ResultManager::GetInputShapes(FLAGS_trt_shape); + option.trt_option.SetShape("x", trt_shapes[0], trt_shapes[1], + trt_shapes[2]); + } + + auto model_ppocr_table = vision::ocr::StructureV2Table( + model_file, params_file, FLAGS_table_char_dict_path, option, + model_format); + fastdeploy::vision::OCRResult result; + + if (config_info["precision_compare"] == "true") { + std::string expect_structure_html = + ""; + std::vector expect_box_coord{ + 41, 4, 97, 18, 161, 4, 173, 18, 216, 4, 225, 17, 272, 4, + 283, 17, 321, 4, 348, 18, 33, 20, 106, 38, 150, 22, 180, 38, + 202, 22, 235, 38, 262, 21, 293, 38, 326, 23, 343, 37, 27, 38, + 109, 56, 150, 39, 179, 56, 204, 39, 236, 56, 263, 39, 292, 55, + 329, 40, 343, 54, 22, 57, 118, 74, 152, 58, 176, 74, 204, 58, + 236, 75, 262, 58, 291, 74, 326, 58, 344, 74, 27, 75, 119, 92, + 150, 75, 177, 92, 204, 75, 235, 92, 260, 75, 292, 92, 326, 75, + 346, 92, 44, 92, 102, 110, 150, 92, 177, 110, 205, 92, 236, 110, + 262, 92, 290, 110, 329, 93, 339, 110, 41, 109, 102, 128, 151, 110, + 175, 128, 205, 110, 236, 128, 262, 110, 291, 127, 329, 110, 338, 127, + 42, 128, 102, 146, 149, 128, 177, 146, 205, 128, 237, 146, 262, 128, + 291, 146, 329, 128, 339, 145, 31, 145, 110, 163, 150, 145, 178, 163, + 206, 145, 237, 164, 262, 145, 292, 163, 324, 145, 342, 162, 40, 162, + 108, 180, 154, 162, 175, 180, 209, 162, 231, 180, 266, 162, 286, 180, + 325, 162, 341, 179, 38, 180, 105, 197, 152, 180, 177, 197, 207, 180, + 236, 197, 262, 180, 291, 197, 329, 181, 339, 196, 42, 196, 102, 214, + 151, 197, 179, 214, 205, 197, 236, 214, 263, 197, 291, 214, 320, 197, + 349, 214, 46, 215, 100, 233, 149, 216, 179, 233, 204, 216, 238, 233, + 262, 216, 291, 233, 321, 216, 345, 232, 42, 233, 104, 251, 147, 234, + 179, 251, 203, 233, 237, 251, 260, 233, 294, 251, 326, 234, 341, 250, + 19, 251, 120, 269, 148, 253, 180, 270, 202, 252, 240, 270, 259, 252, + 294, 270, 324, 252, 347, 268, 16, 270, 123, 286, 146, 270, 182, 287, + 200, 270, 238, 287, 256, 270, 294, 286, 319, 270, 353, 286}; + + // Run once at least + if (!model_ppocr_table.Predict(im, &result)) { + std::cerr << "Failed to predict." << std::endl; + return -1; + } + + // 1. Test result diff + std::cout << "=============== Test Table Result diff =================\n"; + // Calculate diff between two results. + std::string result_table_structure; + for (auto &structure : result.table_structure) { + result_table_structure += structure; + } + if (expect_structure_html == result_table_structure) { + std::cout << "PPOCR Table structure has no diff" << std::endl; + } else { + std::cout << "PPOCR Table structure has diff" << std::endl; + std::cout << "expected: " << expect_structure_html << std::endl; + std::cout << "result: " << result_table_structure << std::endl; + } + + std::vector table_box_coord; + for (auto &box : result.table_boxes) { + // x1 y1 x2 y1 x2 y2 x1 y2 => x1 y1 x2 y2 + table_box_coord.push_back(box[0]); + table_box_coord.push_back(box[1]); + table_box_coord.push_back(box[2]); + table_box_coord.push_back(box[5]); + } + + if (expect_box_coord.size() == table_box_coord.size()) { + std::cout << "table boxes num matched with expected: " + << table_box_coord.size() << std::endl; + int max_diff = 0; + int total_diff = 0; + for (int i = 0; i < table_box_coord.size(); i++) { + int diff = std::abs(table_box_coord[i] - expect_box_coord[i]); + if (diff > max_diff) { + max_diff = diff; + } + total_diff += diff; + } + std::cout << "box coords, max_diff: " << max_diff << ", " + << ", total diff: " << total_diff << ", average diff: " + << total_diff / float(table_box_coord.size()) << std::endl; + } else { + std::cout << "boxes num has diff, expect box num: " + << expect_box_coord.size() / 4 + << ", result box num:" << table_box_coord.size() / 4 + << std::endl; + } + } + + BENCHMARK_MODEL(model_ppocr_table, model_ppocr_table.Predict(im, &result)); +#endif + return 0; +} \ No newline at end of file diff --git a/benchmark/cpp/get_models.sh b/benchmark/cpp/get_models.sh index e17e4b07d..534a714ad 100755 --- a/benchmark/cpp/get_models.sh +++ b/benchmark/cpp/get_models.sh @@ -212,6 +212,7 @@ download_common_model_xvf https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP download_common_model_xvf https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar ch_ppocr_mobile_v2.0_cls_infer.tar download_common_model_xvf https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar ch_PP-OCRv2_det_infer.tar download_common_model_xvf https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar ch_PP-OCRv2_rec_infer.tar +download_common_model_xvf https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar # download images download_common_file https://bj.bcebos.com/paddlehub/fastdeploy/rec_img.jpg rec_img.jpg diff --git a/c_api/fastdeploy_capi/core/config.h b/c_api/fastdeploy_capi/core/config.h index 73ad1145a..73de04c44 100755 --- a/c_api/fastdeploy_capi/core/config.h +++ b/c_api/fastdeploy_capi/core/config.h @@ -18,5 +18,5 @@ #endif #ifndef ENABLE_TEXT -#define ENABLE_TEXT +/* #undef ENABLE_TEXT */ #endif diff --git a/c_api/fastdeploy_capi/core/fd_type.h b/c_api/fastdeploy_capi/core/fd_type.h index 640ace890..7fa7c9cb2 100644 --- a/c_api/fastdeploy_capi/core/fd_type.h +++ b/c_api/fastdeploy_capi/core/fd_type.h @@ -60,6 +60,11 @@ typedef struct FD_C_OneDimArrayCstr { FD_C_Cstr* data; } FD_C_OneDimArrayCstr; // std::vector +typedef struct FD_C_TwoDimArrayCstr { + size_t size; + FD_C_OneDimArrayCstr* data; +} FD_C_TwoDimArrayCstr; // std::vector> + typedef struct FD_C_TwoDimArraySize { size_t size; FD_C_OneDimArraySize* data; @@ -134,6 +139,8 @@ DECLARE_DESTROY_FD_TYPE_FUNCTION(OneDimArrayFloat); DECLARE_DESTROY_FD_TYPE_FUNCTION(Cstr); // FD_C_OneDimArrayCstr DECLARE_DESTROY_FD_TYPE_FUNCTION(OneDimArrayCstr); +// FD_C_TwoDimArrayCstr +DECLARE_DESTROY_FD_TYPE_FUNCTION(TwoDimArrayCstr); // FD_C_TwoDimArraySize DECLARE_DESTROY_FD_TYPE_FUNCTION(TwoDimArraySize); // FD_C_TwoDimArrayInt8 diff --git a/c_api/fastdeploy_capi/vision/ocr/ppocr/model.cc b/c_api/fastdeploy_capi/vision/ocr/ppocr/model.cc index 0da91e995..ffffa5ee9 100644 --- a/c_api/fastdeploy_capi/vision/ocr/ppocr/model.cc +++ b/c_api/fastdeploy_capi/vision/ocr/ppocr/model.cc @@ -318,6 +318,124 @@ FD_C_Bool FD_C_DBDetectorWrapperBatchPredict( return successful; } +// StructureV2Table +FD_C_StructureV2TableWrapper* FD_C_CreateStructureV2TableWrapper( + const char* model_file, const char* params_file, + const char* table_char_dict_path, + FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + const FD_C_ModelFormat model_format) { + auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, + fd_c_runtime_option_wrapper); + FD_C_StructureV2TableWrapper* fd_c_structurev2table_wrapper = + new FD_C_StructureV2TableWrapper(); + fd_c_structurev2table_wrapper->table_model = + std::unique_ptr( + new fastdeploy::vision::ocr::StructureV2Table( + std::string(model_file), std::string(params_file), + std::string(table_char_dict_path), *runtime_option, + static_cast(model_format))); + return fd_c_structurev2table_wrapper; +} + +OCR_DECLARE_AND_IMPLEMENT_DESTROY_WRAPPER_FUNCTION( + StructureV2Table, fd_c_structurev2table_wrapper) + +FD_C_Bool FD_C_StructureV2TableWrapperPredict( + FD_C_StructureV2TableWrapper* fd_c_structurev2table_wrapper, FD_C_Mat img, + FD_C_TwoDimArrayInt32* boxes_result, + FD_C_OneDimArrayCstr* structure_result) { + cv::Mat* im = reinterpret_cast(img); + std::vector> boxes_result_out; + std::vector structures_result_out; + auto& model = CHECK_AND_CONVERT_FD_TYPE(StructureV2TableWrapper, + fd_c_structurev2table_wrapper); + bool successful = + model->Predict(*im, &boxes_result_out, &structures_result_out); + if (successful) { + // copy boxes + const int boxes_coordinate_dim = 8; + boxes_result->size = boxes_result_out.size(); + boxes_result->data = new FD_C_OneDimArrayInt32[boxes_result->size]; + for (size_t i = 0; i < boxes_result_out.size(); i++) { + boxes_result->data[i].size = boxes_coordinate_dim; + boxes_result->data[i].data = new int[boxes_coordinate_dim]; + for (size_t j = 0; j < boxes_coordinate_dim; j++) { + boxes_result->data[i].data[j] = boxes_result_out[i][j]; + } + } + // copy structures + structure_result->size = structures_result_out.size(); + structure_result->data = new FD_C_Cstr[structure_result->size]; + for (int i = 0; i < structures_result_out.size(); i++) { + structure_result->data[i].size = structures_result_out[i].length(); + structure_result->data[i].data = + new char[structures_result_out[i].length() + 1]; + strncpy(structure_result->data[i].data, structures_result_out[i].c_str(), + structures_result_out[i].length()); + } + } + return successful; +} + +OCR_DECLARE_AND_IMPLEMENT_INITIALIZED_FUNCTION(StructureV2Table, + fd_c_structurev2table_wrapper) + +FD_C_Bool FD_C_StructureV2TableWrapperBatchPredict( + FD_C_StructureV2TableWrapper* fd_c_structurev2table_wrapper, + FD_C_OneDimMat imgs, FD_C_ThreeDimArrayInt32* det_results, + FD_C_TwoDimArrayCstr* structure_results) { + std::vector imgs_vec; + std::vector>> det_results_out; + std::vector> structure_results_out; + for (int i = 0; i < imgs.size; i++) { + imgs_vec.push_back(*(reinterpret_cast(imgs.data[i]))); + } + auto& model = CHECK_AND_CONVERT_FD_TYPE(StructureV2TableWrapper, + fd_c_structurev2table_wrapper); + bool successful = + model->BatchPredict(imgs_vec, &det_results_out, &structure_results_out); + if (successful) { + // copy results back to FD_C_ThreeDimArrayInt32 + det_results->size = det_results_out.size(); + det_results->data = new FD_C_TwoDimArrayInt32[det_results->size]; + for (int batch_indx = 0; batch_indx < det_results->size; batch_indx++) { + const int boxes_coordinate_dim = 8; + det_results->data[batch_indx].size = det_results_out[batch_indx].size(); + det_results->data[batch_indx].data = + new FD_C_OneDimArrayInt32[det_results->data[batch_indx].size]; + for (size_t i = 0; i < det_results_out[batch_indx].size(); i++) { + det_results->data[batch_indx].data[i].size = boxes_coordinate_dim; + det_results->data[batch_indx].data[i].data = + new int[boxes_coordinate_dim]; + for (size_t j = 0; j < boxes_coordinate_dim; j++) { + det_results->data[batch_indx].data[i].data[j] = + det_results_out[batch_indx][i][j]; + } + } + } + // copy structures + structure_results->size = structure_results_out.size(); + structure_results->data = new FD_C_OneDimArrayCstr[structure_results->size]; + for (int batch_indx = 0; batch_indx < structure_results->size; + batch_indx++) { + structure_results->data[batch_indx].size = + structure_results_out[batch_indx].size(); + structure_results->data[batch_indx].data = + new FD_C_Cstr[structure_results->data[batch_indx].size]; + for (int i = 0; i < structure_results_out[batch_indx].size(); i++) { + structure_results->data[batch_indx].data[i].size = + structure_results_out[batch_indx][i].length(); + structure_results->data[batch_indx].data[i].data = + new char[structure_results_out[batch_indx][i].length() + 1]; + strncpy(structure_results->data[batch_indx].data[i].data, + structure_results_out[batch_indx][i].c_str(), + structure_results_out[batch_indx][i].length()); + } + } + } + return successful; +} + // PPOCRv2 FD_C_PPOCRv2Wrapper* FD_C_CreatePPOCRv2Wrapper( @@ -466,6 +584,82 @@ FD_C_Bool FD_C_PPOCRv3WrapperBatchPredict( return successful; } +// PPStructureV2Table + +FD_C_PPStructureV2TableWrapper* FD_C_CreatePPStructureV2TableWrapper( + FD_C_DBDetectorWrapper* fd_c_det_model_wrapper, + FD_C_RecognizerWrapper* fd_c_rec_model_wrapper, + FD_C_StructureV2TableWrapper* fd_c_structurev2table_wrapper) { + FD_C_PPStructureV2TableWrapper* fd_c_ppstructurev2table_wrapper = + new FD_C_PPStructureV2TableWrapper(); + auto& det_model = + CHECK_AND_CONVERT_FD_TYPE(DBDetectorWrapper, fd_c_det_model_wrapper); + auto& rec_model = + CHECK_AND_CONVERT_FD_TYPE(RecognizerWrapper, fd_c_rec_model_wrapper); + auto& table_model = CHECK_AND_CONVERT_FD_TYPE(StructureV2TableWrapper, + fd_c_structurev2table_wrapper); + fd_c_ppstructurev2table_wrapper->ppstructurev2table_model = + std::unique_ptr( + new fastdeploy::pipeline::PPStructureV2Table( + det_model.get(), rec_model.get(), table_model.get())); + return fd_c_ppstructurev2table_wrapper; +} + +PIPELINE_DECLARE_AND_IMPLEMENT_DESTROY_WRAPPER_FUNCTION( + PPStructureV2Table, fd_c_ppstructurev2table_wrapper) + +FD_C_Bool FD_C_PPStructureV2TableWrapperPredict( + FD_C_PPStructureV2TableWrapper* fd_c_ppstructurev2table_wrapper, + FD_C_Mat img, FD_C_OCRResult* fd_c_ocr_result) { + cv::Mat* im = reinterpret_cast(img); + auto& model = CHECK_AND_CONVERT_FD_TYPE(PPStructureV2TableWrapper, + fd_c_ppstructurev2table_wrapper); + FD_C_OCRResultWrapper* fd_c_ocr_result_wrapper = + FD_C_CreateOCRResultWrapper(); + auto& ocr_result = + CHECK_AND_CONVERT_FD_TYPE(OCRResultWrapper, fd_c_ocr_result_wrapper); + + bool successful = model->Predict(im, ocr_result.get()); + if (successful) { + FD_C_OCRResultWrapperToCResult(fd_c_ocr_result_wrapper, fd_c_ocr_result); + } + FD_C_DestroyOCRResultWrapper(fd_c_ocr_result_wrapper); + return successful; +} + +PIPELINE_DECLARE_AND_IMPLEMENT_INITIALIZED_FUNCTION( + PPStructureV2Table, fd_c_ppstructurev2table_wrapper) + +FD_C_Bool FD_C_PPStructureV2TableWrapperBatchPredict( + FD_C_PPStructureV2TableWrapper* fd_c_ppstructurev2table_wrapper, + FD_C_OneDimMat imgs, FD_C_OneDimOCRResult* results) { + std::vector imgs_vec; + std::vector results_wrapper_out; + std::vector results_out; + for (int i = 0; i < imgs.size; i++) { + imgs_vec.push_back(*(reinterpret_cast(imgs.data[i]))); + FD_C_OCRResultWrapper* fd_ocr_result_wrapper = + FD_C_CreateOCRResultWrapper(); + results_wrapper_out.push_back(fd_ocr_result_wrapper); + } + auto& model = CHECK_AND_CONVERT_FD_TYPE(PPStructureV2TableWrapper, + fd_c_ppstructurev2table_wrapper); + bool successful = model->BatchPredict(imgs_vec, &results_out); + if (successful) { + // copy results back to FD_C_OneDimOCRResult + results->size = results_out.size(); + results->data = new FD_C_OCRResult[results->size]; + for (int i = 0; i < results_out.size(); i++) { + (*CHECK_AND_CONVERT_FD_TYPE(OCRResultWrapper, results_wrapper_out[i])) = + std::move(results_out[i]); + FD_C_OCRResultWrapperToCResult(results_wrapper_out[i], &results->data[i]); + } + } + for (int i = 0; i < results_out.size(); i++) { + FD_C_DestroyOCRResultWrapper(results_wrapper_out[i]); + } + return successful; +} #ifdef __cplusplus } #endif \ No newline at end of file diff --git a/c_api/fastdeploy_capi/vision/ocr/ppocr/model.h b/c_api/fastdeploy_capi/vision/ocr/ppocr/model.h index b1a1fd0a5..0963e1ede 100644 --- a/c_api/fastdeploy_capi/vision/ocr/ppocr/model.h +++ b/c_api/fastdeploy_capi/vision/ocr/ppocr/model.h @@ -225,6 +225,68 @@ FASTDEPLOY_CAPI_EXPORT extern FD_C_Bool FD_C_DBDetectorWrapperBatchPredict( FD_C_ThreeDimArrayInt32* det_results); +// StructureV2Table + +typedef struct FD_C_StructureV2TableWrapper FD_C_StructureV2TableWrapper; + +/** \brief Create a new FD_C_StructureV2TableWrapper object + * + * \param[in] model_file Path of model file, e.g ./en_ppstructure_mobile_v2.0_SLANet_infer/model.pdmodel. + * \param[in] params_file Path of parameter file, e.g ./en_ppstructure_mobile_v2.0_SLANet_infer/model.pdiparams, if the model format is ONNX, this parameter will be ignored. + * \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in `valid_cpu_backends`. + * \param[in] model_format Model format of the loaded model, default is Paddle format. + * + * \return Return a pointer to FD_C_StructureV2TableWrapper object + */ + +FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_StructureV2TableWrapper* +FD_C_CreateStructureV2TableWrapper( + const char* model_file, const char* params_file, const char* table_char_dict_path, + FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, + const FD_C_ModelFormat model_format); + +/** \brief Destroy a FD_C_StructureV2TableWrapper object + * + * \param[in] fd_c_structurev2table_wrapper pointer to FD_C_DBDetectorWrapper object + */ + +OCR_DECLARE_DESTROY_WRAPPER_FUNCTION(StructureV2Table, fd_c_structurev2table_wrapper); + +/** \brief Predict the input image and get OCR table model result. + * + * \param[in] fd_c_structurev2table_wrapper pointer to FD_C_StructureV2TableWrapper object + * \param[in] img The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format. + * \param[in] boxes_result The output of OCR table model result will be writen to this structure. + * \return true if the prediction is successed, otherwise false. + */ + +FASTDEPLOY_CAPI_EXPORT extern FD_C_Bool FD_C_StructureV2TableWrapperPredict( + __fd_keep FD_C_StructureV2TableWrapper* fd_c_structurev2table_wrapper, FD_C_Mat img, + FD_C_TwoDimArrayInt32* boxes_result, FD_C_OneDimArrayCstr* structure_result); + +/** \brief Check if the model is initialized successfully + * + * \param[in] fd_c_dbdetector_wrapper pointer to FD_C_StructureV2TableWrapper object + * + * \return Return a bool of value true if initialized successfully + */ + +OCR_DECLARE_INITIALIZED_FUNCTION(StructureV2Table, fd_c_structurev2table_wrapper); + +/** \brief BatchPredict the input image and get OCR table model result. + * + * \param[in] fd_c_structurev2table_wrapper pointer to FD_C_StructureV2TableWrapper object + * \param[in] imgs The list input of image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format. + * \param[in] det_results The output of OCR table model result will be writen to this structure. + * + * \return true if the prediction is successed, otherwise false. + */ + +FASTDEPLOY_CAPI_EXPORT extern FD_C_Bool FD_C_StructureV2TableWrapperBatchPredict( + __fd_keep FD_C_StructureV2TableWrapper* fd_c_structurev2table_wrapper, FD_C_OneDimMat imgs, + FD_C_ThreeDimArrayInt32* det_results, FD_C_TwoDimArrayCstr* structure_results); + + // PPOCRv2 @@ -343,6 +405,63 @@ FASTDEPLOY_CAPI_EXPORT extern FD_C_Bool FD_C_PPOCRv3WrapperBatchPredict( FD_C_OneDimOCRResult* batch_result); +// PPStructureV2Table + +typedef struct FD_C_PPStructureV2TableWrapper FD_C_PPStructureV2TableWrapper; + +/** \brief Set up the detection model path, classification model path and table recognition model path respectively. + * + * \param[in] det_model Path of detection model, e.g ./ch_PP-OCRv3_det_infer + * \param[in] rec_model Path of recognition model, e.g ./ch_PP-OCRv3_rec_infer + * \param[in] table_model Path of table model, e.g ./en_ppstructure_mobile_v2.0_SLANet_infer + */ + +FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_PPStructureV2TableWrapper* +FD_C_CreatePPStructureV2TableWrapper( + FD_C_DBDetectorWrapper* det_model, + FD_C_RecognizerWrapper* rec_model, + FD_C_StructureV2TableWrapper* table_model); + +/** \brief Destroy a FD_C_PPTableWrapper object + * + * \param[in] fd_c_ppstructurev2table_wrapper pointer to FD_C_PPStructureV2TableWrapper object + */ + +OCR_DECLARE_DESTROY_WRAPPER_FUNCTION(PPStructureV2Table, fd_c_ppstructurev2table_wrapper); + +/** \brief Predict the input image and get OCR result. + * + * \param[in] fd_c_ppstructurev2table_wrapper pointer to FD_C_PPStructureV2TableWrapper object + * \param[in] img The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format. + * \param[in] result The output OCR result will be writen to this structure. + * \return true if the prediction successed, otherwise false. + */ + +FASTDEPLOY_CAPI_EXPORT extern FD_C_Bool FD_C_PPStructureV2TableWrapperPredict( + __fd_keep FD_C_PPStructureV2TableWrapper* fd_c_ppstructurev2table_wrapper, FD_C_Mat img, + FD_C_OCRResult* result); + +/** \brief Check if the model is initialized successfully + * + * \param[in] fd_c_ppstructurev2table_wrapper pointer to FD_C_PPStructureV2TableWrapper object + * + * \return Return a bool of value true if initialized successfully + */ + +OCR_DECLARE_INITIALIZED_FUNCTION(PPStructureV2Table, fd_c_ppstructurev2table_wrapper); + +/** \brief BatchPredict the input image and get OCR result. + * + * \param[in] fd_c_ppstructurev2table_wrapper pointer to FD_C_PPStructureV2TableWrapper object + * \param[in] imgs The list of input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format. + * \param[in] batch_result The output list of OCR result will be writen to this structure. + * \return true if the prediction successed, otherwise false. + */ + +FASTDEPLOY_CAPI_EXPORT extern FD_C_Bool FD_C_PPStructureV2TableWrapperBatchPredict( + __fd_keep FD_C_PPStructureV2TableWrapper* fd_c_ppstructurev2table_wrapper, FD_C_OneDimMat imgs, + FD_C_OneDimOCRResult* batch_result); + #ifdef __cplusplus } // extern "C" #endif diff --git a/c_api/fastdeploy_capi/vision/result.h b/c_api/fastdeploy_capi/vision/result.h index 09ebaa875..a8f780abd 100644 --- a/c_api/fastdeploy_capi/vision/result.h +++ b/c_api/fastdeploy_capi/vision/result.h @@ -70,6 +70,9 @@ typedef struct FD_C_OCRResult { FD_C_OneDimArrayFloat rec_scores; FD_C_OneDimArrayFloat cls_scores; FD_C_OneDimArrayInt32 cls_labels; + FD_C_TwoDimArrayInt32 table_boxes; + FD_C_OneDimArrayCstr table_structure; + FD_C_Cstr table_html; FD_C_ResultType type; } FD_C_OCRResult; diff --git a/c_api/fastdeploy_capi/vision/types_internal.cc b/c_api/fastdeploy_capi/vision/types_internal.cc index 27994ccbc..ba2616eac 100644 --- a/c_api/fastdeploy_capi/vision/types_internal.cc +++ b/c_api/fastdeploy_capi/vision/types_internal.cc @@ -166,6 +166,10 @@ DECL_AND_IMPLEMENT_OCR_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER( DECL_AND_IMPLEMENT_OCR_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER( Classifier, fd_classifier_wrapper, classifier_model); +// Table +DECL_AND_IMPLEMENT_OCR_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER( + StructureV2Table, fd_structurev2_table_wrapper, table_model); + // PPOCRv2 DECL_AND_IMPLEMENT_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER( PPOCRv2, fd_ppocrv2_wrapper, ppocrv2_model); @@ -174,6 +178,11 @@ DECL_AND_IMPLEMENT_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER( DECL_AND_IMPLEMENT_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER( PPOCRv3, fd_ppocrv3_wrapper, ppocrv3_model); +// PPStructureV2Table +DECL_AND_IMPLEMENT_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER( + PPStructureV2Table, fd_ppstructurev2_table_wrapper, + ppstructurev2table_model); + // Segmentation models // PaddleSegModel diff --git a/c_api/fastdeploy_capi/vision/types_internal.h b/c_api/fastdeploy_capi/vision/types_internal.h index f4bc1696e..ea83562cf 100644 --- a/c_api/fastdeploy_capi/vision/types_internal.h +++ b/c_api/fastdeploy_capi/vision/types_internal.h @@ -29,8 +29,10 @@ #include "fastdeploy/vision/ocr/ppocr/classifier.h" #include "fastdeploy/vision/ocr/ppocr/dbdetector.h" #include "fastdeploy/vision/ocr/ppocr/recognizer.h" +#include "fastdeploy/vision/ocr/ppocr/structurev2_table.h" #include "fastdeploy/vision/ocr/ppocr/ppocr_v2.h" #include "fastdeploy/vision/ocr/ppocr/ppocr_v3.h" +#include "fastdeploy/vision/ocr/ppocr/ppstructurev2_table.h" #include "fastdeploy/vision/segmentation/ppseg/model.h" #define DEFINE_RESULT_WRAPPER_STRUCT(typename, varname) typedef struct FD_C_##typename##Wrapper { \ @@ -176,12 +178,18 @@ DEFINE_OCR_MODEL_WRAPPER_STRUCT(DBDetector, dbdetector_model); // Classifier DEFINE_OCR_MODEL_WRAPPER_STRUCT(Classifier, classifier_model); +// StructureV2Table +DEFINE_OCR_MODEL_WRAPPER_STRUCT(StructureV2Table, table_model); + // PPOCRv2 DEFINE_PIPELINE_MODEL_WRAPPER_STRUCT(PPOCRv2, ppocrv2_model); // PPOCRv3 DEFINE_PIPELINE_MODEL_WRAPPER_STRUCT(PPOCRv3, ppocrv3_model); +// PPStructureV2Table +DEFINE_PIPELINE_MODEL_WRAPPER_STRUCT(PPStructureV2Table, ppstructurev2table_model); + // Segmentation models // PaddleSegModel @@ -383,12 +391,18 @@ DECLARE_OCR_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(DBDetector, fd_dbdetector_wrappe // Classifier DECLARE_OCR_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(Classifier, fd_classifier_wrapper); +// Table +DECLARE_OCR_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(StructureV2Table, fd_structurev2_table_wrapper); + // PPOCRv2 DECLARE_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(PPOCRv2, fd_ppocrv2_wrapper); // PPOCRv3 DECLARE_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(PPOCRv3, fd_ppocrv3_wrapper); +// PPStructureV2Table +DECLARE_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(PPStructureV2Table, fd_ppstructurev2_table_wrapper); + // Segmentation models // PaddleSegModel diff --git a/examples/vision/ocr/PP-OCR/cpu-gpu/cpp/CMakeLists.txt b/examples/vision/ocr/PP-OCR/cpu-gpu/cpp/CMakeLists.txt index fe4e03f26..ac0101c93 100644 --- a/examples/vision/ocr/PP-OCR/cpu-gpu/cpp/CMakeLists.txt +++ b/examples/vision/ocr/PP-OCR/cpu-gpu/cpp/CMakeLists.txt @@ -14,6 +14,11 @@ add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc) # 添加FastDeploy库依赖 target_link_libraries(infer_demo ${FASTDEPLOY_LIBS}) +# PPStructure-V2-Table +add_executable(infer_ppstructurev2_table ${PROJECT_SOURCE_DIR}/infer_ppstructurev2_table.cc) +# 添加FastDeploy库依赖 +target_link_libraries(infer_ppstructurev2_table ${FASTDEPLOY_LIBS}) + # Only Det add_executable(infer_det ${PROJECT_SOURCE_DIR}/infer_det.cc) # 添加FastDeploy库依赖 @@ -28,3 +33,8 @@ target_link_libraries(infer_cls ${FASTDEPLOY_LIBS}) add_executable(infer_rec ${PROJECT_SOURCE_DIR}/infer_rec.cc) # 添加FastDeploy库依赖 target_link_libraries(infer_rec ${FASTDEPLOY_LIBS}) + +# Only Table +add_executable(infer_structurev2_table ${PROJECT_SOURCE_DIR}/infer_structurev2_table.cc) +# 添加FastDeploy库依赖 +target_link_libraries(infer_structurev2_table ${FASTDEPLOY_LIBS}) diff --git a/examples/vision/ocr/PP-OCR/cpu-gpu/cpp/README.md b/examples/vision/ocr/PP-OCR/cpu-gpu/cpp/README.md index 4481f49be..17332de19 100644 --- a/examples/vision/ocr/PP-OCR/cpu-gpu/cpp/README.md +++ b/examples/vision/ocr/PP-OCR/cpu-gpu/cpp/README.md @@ -43,10 +43,15 @@ tar -xvf ch_ppocr_mobile_v2.0_cls_infer.tar # 下载PP-OCRv3文字识别模型 wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar tar -xvf ch_PP-OCRv3_rec_infer.tar +# 下载PPStructureV2表格识别模型 +wget https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar +tar xf ch_ppstructure_mobile_v2.0_SLANet_infer.tar # 下载预测图片与字典文件 wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg +wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppstructure/docs/table/table.jpg wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt +wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/dict/table_structure_dict_ch.txt # 运行部署示例 # 在CPU上使用Paddle Inference推理 @@ -77,6 +82,9 @@ wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_ # 在CPU上,单独使用文字识别模型部署 ./infer_rec ./ch_PP-OCRv3_rec_infer ./ppocr_keys_v1.txt ./12.jpg 0 + +# 在CPU上,单独使用表格识别模型部署 +./infer_structurev2_table ./ch_ppstructure_mobile_v2.0_SLANet_infer ./table_structure_dict_ch.txt ./table.jpg 0 ``` 运行完成可视化结果如下图所示 diff --git a/examples/vision/ocr/PP-OCR/cpu-gpu/cpp/infer_ppstructurev2_table.cc b/examples/vision/ocr/PP-OCR/cpu-gpu/cpp/infer_ppstructurev2_table.cc new file mode 100755 index 000000000..3ae1156fe --- /dev/null +++ b/examples/vision/ocr/PP-OCR/cpu-gpu/cpp/infer_ppstructurev2_table.cc @@ -0,0 +1,177 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision.h" +#ifdef WIN32 +const char sep = '\\'; +#else +const char sep = '/'; +#endif + +void InitAndInfer(const std::string &det_model_dir, + const std::string &rec_model_dir, + const std::string &table_model_dir, + const std::string &rec_label_file, + const std::string &table_char_dict_path, + const std::string &image_file, + const fastdeploy::RuntimeOption &option) { + auto det_model_file = det_model_dir + sep + "inference.pdmodel"; + auto det_params_file = det_model_dir + sep + "inference.pdiparams"; + + auto rec_model_file = rec_model_dir + sep + "inference.pdmodel"; + auto rec_params_file = rec_model_dir + sep + "inference.pdiparams"; + + auto table_model_file = table_model_dir + sep + "inference.pdmodel"; + auto table_params_file = table_model_dir + sep + "inference.pdiparams"; + + auto det_option = option; + auto rec_option = option; + auto table_option = option; + + // The rec model can inference a batch of images now. + // User could initialize the inference batch size and set them after create + // PP-OCR model. + int rec_batch_size = 1; + + // If use TRT backend, the dynamic shape will be set as follow. + // We recommend that users set the length and height of the detection model to + // a multiple of 32. + // We also recommend that users set the Trt input shape as follow. + det_option.SetTrtInputShape("x", {1, 3, 64, 64}, {1, 3, 640, 640}, + {1, 3, 960, 960}); + rec_option.SetTrtInputShape("x", {1, 3, 48, 10}, {rec_batch_size, 3, 48, 320}, + {rec_batch_size, 3, 48, 2304}); + table_option.SetTrtInputShape("x", {1, 3, 488, 488}, {1, 3, 488, 488}, + {1, 3, 488, 488}); + + // Users could save TRT cache file to disk as follow. + det_option.SetTrtCacheFile(det_model_dir + sep + "det_trt_cache.trt"); + rec_option.SetTrtCacheFile(rec_model_dir + sep + "rec_trt_cache.trt"); + table_option.SetTrtCacheFile(table_model_dir + sep + "table_trt_cache.trt"); + + auto det_model = fastdeploy::vision::ocr::DBDetector( + det_model_file, det_params_file, det_option); + auto rec_model = fastdeploy::vision::ocr::Recognizer( + rec_model_file, rec_params_file, rec_label_file, rec_option); + auto table_model = fastdeploy::vision::ocr::StructureV2Table( + table_model_file, table_params_file, table_char_dict_path, table_option); + + assert(det_model.Initialized()); + assert(rec_model.Initialized()); + assert(table_model.Initialized()); + + // Parameters settings for pre and post processing of Det/Cls/Rec Models. + // All parameters are set to default values. + det_model.GetPreprocessor().SetMaxSideLen(960); + det_model.GetPostprocessor().SetDetDBThresh(0.3); + det_model.GetPostprocessor().SetDetDBBoxThresh(0.6); + det_model.GetPostprocessor().SetDetDBUnclipRatio(1.5); + det_model.GetPostprocessor().SetDetDBScoreMode("slow"); + det_model.GetPostprocessor().SetUseDilation(0); + + rec_model.GetPreprocessor().SetStaticShapeInfer(true); + rec_model.GetPreprocessor().SetRecImageShape({3, 48, 320}); + + // The classification model is optional, so the PP-OCR can also be connected + // in series as follows + auto ppstructurev2_table = fastdeploy::pipeline::PPStructureV2Table( + &det_model, &rec_model, &table_model); + + // Set inference batch size for cls model and rec model, the value could be -1 + // and 1 to positive infinity. + // When inference batch size is set to -1, it means that the inference batch + // size of the rec models will be the same as the number of boxes detected + // by the det model. + ppstructurev2_table.SetRecBatchSize(rec_batch_size); + + if (!ppstructurev2_table.Initialized()) { + std::cerr << "Failed to initialize PP-OCR-Table." << std::endl; + return; + } + + auto im = cv::imread(image_file); + auto im_bak = im.clone(); + + fastdeploy::vision::OCRResult result; + if (!ppstructurev2_table.Predict(&im, &result)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + + std::cout << result.Str() << std::endl; + + auto vis_im = fastdeploy::vision::VisOcr(im_bak, result); + cv::imwrite("vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; +} + +int main(int argc, char *argv[]) { + if (argc < 8) { + std::cout << "Usage: infer_ppstructurev2_table path/to/det_model " + "path/to/rec_model " + "path/to/table_model path/to/rec_label_file " + "path/to/table_char_dict_path path/to/image " + "run_option, " + "e.g ./infer_ppstructurev2_table ./ch_PP-OCRv3_det_infer " + "./ch_ppocr_mobile_v2.0_cls_infer ./ch_PP-OCRv3_rec_infer " + "./ppocr_keys_v1.txt ./12.jpg 0" + << std::endl; + std::cout << "The data type of run_option is int, e.g. 0: run with paddle " + "inference on cpu;" + << std::endl; + return -1; + } + + fastdeploy::RuntimeOption option; + int flag = std::atoi(argv[7]); + std::cout << "flag: " << flag << std::endl; + + if (flag == 0) { + option.UseCpu(); + option.UsePaddleBackend(); // Paddle Inference + } else if (flag == 1) { + option.UseCpu(); + option.UseOpenVINOBackend(); // OpenVINO + } else if (flag == 2) { + option.UseCpu(); + option.UseOrtBackend(); // ONNX Runtime + } else if (flag == 3) { + option.UseCpu(); + option.UseLiteBackend(); // Paddle Lite + } else if (flag == 4) { + option.UseGpu(); + option.UsePaddleBackend(); // Paddle Inference + } else if (flag == 5) { + option.UseGpu(); + option.UsePaddleInferBackend(); + option.paddle_infer_option.collect_trt_shape = true; + option.paddle_infer_option.enable_trt = true; // Paddle-TensorRT + } else if (flag == 6) { + option.UseGpu(); + option.UseOrtBackend(); // ONNX Runtime + } else if (flag == 7) { + option.UseGpu(); + option.UseTrtBackend(); // TensorRT + } + + std::string det_model_dir = argv[1]; + std::string rec_model_dir = argv[2]; + std::string table_model_dir = argv[3]; + std::string rec_label_file = argv[4]; + std::string table_char_dict_path = argv[5]; + std::string test_image = argv[6]; + InitAndInfer(det_model_dir, rec_model_dir, table_model_dir, rec_label_file, + table_char_dict_path, test_image, option); + return 0; +} diff --git a/examples/vision/ocr/PP-OCR/cpu-gpu/cpp/infer_structurev2_table.cc b/examples/vision/ocr/PP-OCR/cpu-gpu/cpp/infer_structurev2_table.cc new file mode 100755 index 000000000..15cf4fc4d --- /dev/null +++ b/examples/vision/ocr/PP-OCR/cpu-gpu/cpp/infer_structurev2_table.cc @@ -0,0 +1,74 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision.h" +#ifdef WIN32 +const char sep = '\\'; +#else +const char sep = '/'; +#endif + +void InitAndInfer(const std::string &table_model_dir, + const std::string &image_file, + const std::string &table_char_dict_path, + const fastdeploy::RuntimeOption &option) { + auto table_model_file = table_model_dir + sep + "inference.pdmodel"; + auto table_params_file = table_model_dir + sep + "inference.pdiparams"; + auto table_option = option; + + auto table_model = fastdeploy::vision::ocr::StructureV2Table( + table_model_file, table_params_file, table_char_dict_path, table_option); + assert(table_model.Initialized()); + + auto im = cv::imread(image_file); + auto im_bak = im.clone(); + + fastdeploy::vision::OCRResult result; + if (!table_model.Predict(im, &result)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + + std::cout << result.Str() << std::endl; +} + +int main(int argc, char *argv[]) { + if (argc < 5) { + std::cout << "Usage: infer_demo path/to/table_model path/to/image " + "path/to/table_dict_path" + "run_option, " + "e.g ./infer_structurev2_table ch_ppocr_mobile_v2.0_cls_infer " + "table.jpg table_structure_dict.txt 0" + << std::endl; + std::cout << "The data type of run_option is int, 0: run with cpu; 1: run " + "with gpu;." + << std::endl; + return -1; + } + + fastdeploy::RuntimeOption option; + int flag = std::atoi(argv[4]); + + if (flag == 0) { + option.UseCpu(); + } else if (flag == 1) { + option.UseGpu(); + } + + std::string table_model_dir = argv[1]; + std::string test_image = argv[2]; + std::string table_char_dict_path = argv[3]; + InitAndInfer(table_model_dir, test_image, table_char_dict_path, option); + return 0; +} diff --git a/examples/vision/ocr/PP-OCR/cpu-gpu/python/README.md b/examples/vision/ocr/PP-OCR/cpu-gpu/python/README.md index d8143e028..60e8dd0c7 100644 --- a/examples/vision/ocr/PP-OCR/cpu-gpu/python/README.md +++ b/examples/vision/ocr/PP-OCR/cpu-gpu/python/README.md @@ -36,10 +36,15 @@ tar -xvf ch_ppocr_mobile_v2.0_cls_infer.tar # 下载PP-OCRv3文字识别模型 wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar tar -xvf ch_PP-OCRv3_rec_infer.tar +# 下载PPStructureV2表格识别模型 +wget https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar +tar xf ch_ppstructure_mobile_v2.0_SLANet_infer.tar # 下载预测图片与字典文件 wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg +wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppstructure/docs/table/table.jpg wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt +wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/dict/table_structure_dict_ch.txt # 运行部署示例 # 在CPU上使用Paddle Inference推理 @@ -71,6 +76,8 @@ python infer_cls.py --cls_model ch_ppocr_mobile_v2.0_cls_infer --image 12.jpg -- # 在CPU上,单独使用文字识别模型部署 python infer_rec.py --rec_model ch_PP-OCRv3_rec_infer --rec_label_file ppocr_keys_v1.txt --image 12.jpg --device cpu +# 在CPU上,单独使用文字识别模型部署 +python infer_structurev2_table.py --table_model ./ch_ppstructure_mobile_v2.0_SLANet_infer --table_char_dict_path ./table_structure_dict_ch.txt --image table.jpg --device cpu ``` 运行完成可视化结果如下图所示 diff --git a/examples/vision/ocr/PP-OCR/cpu-gpu/python/infer_ppstructurev2_table.py b/examples/vision/ocr/PP-OCR/cpu-gpu/python/infer_ppstructurev2_table.py new file mode 100755 index 000000000..9fd5fae10 --- /dev/null +++ b/examples/vision/ocr/PP-OCR/cpu-gpu/python/infer_ppstructurev2_table.py @@ -0,0 +1,175 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import fastdeploy as fd +import cv2 +import os + + +def parse_arguments(): + import argparse + parser = argparse.ArgumentParser() + parser.add_argument( + "--det_model", required=True, help="Path of Detection model of PPOCR.") + parser.add_argument( + "--rec_model", + required=True, + help="Path of Recognization model of PPOCR.") + parser.add_argument( + "--table_model", + required=True, + help="Path of Table recognition model of PPOCR.") + parser.add_argument( + "--rec_label_file", + required=True, + help="Path of Recognization model of PPOCR.") + parser.add_argument( + "--table_char_dict_path", + type=str, + required=True, + help="tabel recognition dict path.") + parser.add_argument( + "--rec_bs", + type=int, + default=6, + help="Recognition model inference batch size") + parser.add_argument( + "--image", type=str, required=True, help="Path of test image file.") + parser.add_argument( + "--device", + type=str, + default='cpu', + help="Type of inference device, support 'cpu' or 'gpu'.") + parser.add_argument( + "--device_id", + type=int, + default=0, + help="Define which GPU card used to run model.") + parser.add_argument( + "--backend", + type=str, + default="default", + help="Type of inference backend, support ort/trt/paddle/openvino, default 'openvino' for cpu, 'tensorrt' for gpu" + ) + + return parser.parse_args() + + +def build_option(args): + det_option = fd.RuntimeOption() + rec_option = fd.RuntimeOption() + table_option = fd.RuntimeOption() + + if args.device.lower() == "gpu": + det_option.use_gpu(args.device_id) + rec_option.use_gpu(args.device_id) + table_option.use_gpu(args.device_id) + + if args.backend.lower() == "trt": + assert args.device.lower( + ) == "gpu", "TensorRT backend require inference on device GPU." + det_option.use_trt_backend() + rec_option.use_trt_backend() + table_option.use_trt_backend() + + # If use TRT backend, the dynamic shape will be set as follow. + # We recommend that users set the length and height of the detection model to a multiple of 32. + # We also recommend that users set the Trt input shape as follow. + det_option.set_trt_input_shape("x", [1, 3, 64, 64], [1, 3, 640, 640], + [1, 3, 960, 960]) + + rec_option.set_trt_input_shape("x", [1, 3, 48, 10], + [args.rec_bs, 3, 48, 320], + [args.rec_bs, 3, 48, 2304]) + + table_option.set_trt_input_shape("x", [1, 3, 488, 488]) + + # Users could save TRT cache file to disk as follow. + det_option.set_trt_cache_file(args.det_model + "/det_trt_cache.trt") + rec_option.set_trt_cache_file(args.rec_model + "/rec_trt_cache.trt") + table_option.set_trt_cache_file(args.table_model + + "/table_trt_cache.trt") + + elif args.backend.lower() == "ort": + det_option.use_ort_backend() + rec_option.use_ort_backend() + table_option.use_ort_backend() + + elif args.backend.lower() == "paddle": + det_option.use_paddle_infer_backend() + rec_option.use_paddle_infer_backend() + table_option.use_paddle_infer_backend() + + elif args.backend.lower() == "openvino": + assert args.device.lower( + ) == "cpu", "OpenVINO backend require inference on device CPU." + det_option.use_openvino_backend() + rec_option.use_openvino_backend() + table_option.use_openvino_backend() + + return det_option, rec_option, table_option + + +args = parse_arguments() + +det_model_file = os.path.join(args.det_model, "inference.pdmodel") +det_params_file = os.path.join(args.det_model, "inference.pdiparams") + +rec_model_file = os.path.join(args.rec_model, "inference.pdmodel") +rec_params_file = os.path.join(args.rec_model, "inference.pdiparams") +rec_label_file = args.rec_label_file + +table_model_file = os.path.join(args.table_model, "inference.pdmodel") +table_params_file = os.path.join(args.table_model, "inference.pdiparams") +table_char_dict_path = args.table_char_dict_path + +# Set the runtime option +det_option, rec_option, table_option = build_option(args) + +det_model = fd.vision.ocr.DBDetector( + det_model_file, det_params_file, runtime_option=det_option) + +rec_model = fd.vision.ocr.Recognizer( + rec_model_file, rec_params_file, rec_label_file, runtime_option=rec_option) + +table_model = fd.vision.ocr.StructureV2Table( + table_model_file, + table_params_file, + table_char_dict_path, + runtime_option=table_option) + +det_model.preprocessor.max_side_len = 960 +det_model.postprocessor.det_db_thresh = 0.3 +det_model.postprocessor.det_db_box_thresh = 0.6 +det_model.postprocessor.det_db_unclip_ratio = 1.5 +det_model.postprocessor.det_db_score_mode = "slow" +det_model.postprocessor.use_dilation = False + +ppstructurev2_table = fd.vision.ocr.PPStructureV2Table( + det_model=det_model, rec_model=rec_model, table_model=table_model) + +ppstructurev2_table.rec_batch_size = args.rec_bs + +# Read the input image +im = cv2.imread(args.image) + +# Predict and reutrn the results +result = ppstructurev2_table.predict(im) + +print(result) + +# Visuliaze the results. +vis_im = fd.vision.vis_ppocr(im, result) +cv2.imwrite("visualized_result.jpg", vis_im) +print("Visualized result save in ./visualized_result.jpg") diff --git a/examples/vision/ocr/PP-OCR/cpu-gpu/python/infer_structurev2_table.py b/examples/vision/ocr/PP-OCR/cpu-gpu/python/infer_structurev2_table.py new file mode 100755 index 000000000..45344d503 --- /dev/null +++ b/examples/vision/ocr/PP-OCR/cpu-gpu/python/infer_structurev2_table.py @@ -0,0 +1,77 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import fastdeploy as fd +import cv2 +import os + + +def parse_arguments(): + import argparse + parser = argparse.ArgumentParser() + parser.add_argument( + "--table_model", + required=True, + help="Path of Table recognition model of PPOCR.") + parser.add_argument( + "--table_char_dict_path", + type=str, + required=True, + help="tabel recognition dict path.") + parser.add_argument( + "--image", type=str, required=True, help="Path of test image file.") + parser.add_argument( + "--device", + type=str, + default='cpu', + help="Type of inference device, support 'cpu' or 'gpu'.") + parser.add_argument( + "--device_id", + type=int, + default=0, + help="Define which GPU card used to run model.") + + return parser.parse_args() + + +def build_option(args): + + table_option = fd.RuntimeOption() + + if args.device.lower() == "gpu": + table_option.use_gpu(args.device_id) + + return table_option + + +args = parse_arguments() + +table_model_file = os.path.join(args.table_model, "inference.pdmodel") +table_params_file = os.path.join(args.table_model, "inference.pdiparams") + +# Set the runtime option +table_option = build_option(args) + +# Create the table_model +table_model = fd.vision.ocr.StructureV2Table( + table_model_file, table_params_file, args.table_char_dict_path, + table_option) + +# Read the image +im = cv2.imread(args.image) + +# Predict and return the results +result = table_model.predict(im) + +print(result) diff --git a/fastdeploy/vision.h b/fastdeploy/vision.h index 00dfa7ffa..3800b56f2 100755 --- a/fastdeploy/vision.h +++ b/fastdeploy/vision.h @@ -53,8 +53,10 @@ #include "fastdeploy/vision/matting/ppmatting/ppmatting.h" #include "fastdeploy/vision/ocr/ppocr/classifier.h" #include "fastdeploy/vision/ocr/ppocr/dbdetector.h" +#include "fastdeploy/vision/ocr/ppocr/structurev2_table.h" #include "fastdeploy/vision/ocr/ppocr/ppocr_v2.h" #include "fastdeploy/vision/ocr/ppocr/ppocr_v3.h" +#include "fastdeploy/vision/ocr/ppocr/ppstructurev2_table.h" #include "fastdeploy/vision/ocr/ppocr/recognizer.h" #include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h" #include "fastdeploy/vision/segmentation/ppseg/model.h" diff --git a/fastdeploy/vision/common/result.cc b/fastdeploy/vision/common/result.cc old mode 100755 new mode 100644 index aabcc2a1a..a52cc95f6 --- a/fastdeploy/vision/common/result.cc +++ b/fastdeploy/vision/common/result.cc @@ -649,6 +649,32 @@ std::string OCRResult::Str() { } out = out + "\n"; } + + if (table_boxes.size() > 0 && table_structure.size() > 0) { + for (int n = 0; n < boxes.size(); n++) { + out = out + "table boxes: ["; + for (int i = 0; i < 4; i++) { + out = out + "[" + std::to_string(table_boxes[n][i * 2]) + "," + + std::to_string(table_boxes[n][i * 2 + 1]) + "]"; + + if (i != 1) { + out = out + ","; + } + } + out = out + "]"; + } + + out = out + "\ntable structure: "; + for (int m = 0; m < table_structure.size(); m++) { + out += table_structure[m]; + } + + if (!table_html.empty()) { + out = out + "\n" + "table html: " + table_html; + } + } + std::vector> table_boxes; + std::vector table_structure; return out; } else if (boxes.size() == 0 && rec_scores.size() > 0 && @@ -680,6 +706,31 @@ std::string OCRResult::Str() { out = out + "\n"; } return out; + } else if (boxes.size() == 0 && table_boxes.size() > 0 && + table_structure.size() > 0) { + std::string out; + for (int n = 0; n < table_boxes.size(); n++) { + out = out + ", table boxes: ["; + for (int i = 0; i < 2; i++) { + out = out + "[" + std::to_string(table_boxes[n][i * 2]) + "," + + std::to_string(table_boxes[n][i * 2 + 1]) + "]"; + + if (i != 1) { + out = out + ","; + } + } + out = out + "]"; + } + + out = out + "\ntable structure: "; + for (int m = 0; m < table_structure.size(); m++) { + out += table_structure[m]; + } + + if (!table_html.empty()) { + out = out + "\n" + "table html: " + table_html; + } + return out; } no_result = no_result + "No Results!"; diff --git a/fastdeploy/vision/common/result.h b/fastdeploy/vision/common/result.h index 8a72f348b..6ef96d140 100755 --- a/fastdeploy/vision/common/result.h +++ b/fastdeploy/vision/common/result.h @@ -216,6 +216,10 @@ struct FASTDEPLOY_DECL OCRResult : public BaseResult { std::vector cls_scores; std::vector cls_labels; + std::vector> table_boxes; + std::vector table_structure; + std::string table_html; + ResultType type = ResultType::OCR; void Clear(); diff --git a/fastdeploy/vision/ocr/ocr_pybind.cc b/fastdeploy/vision/ocr/ocr_pybind.cc index b1e234875..c936a6ab0 100644 --- a/fastdeploy/vision/ocr/ocr_pybind.cc +++ b/fastdeploy/vision/ocr/ocr_pybind.cc @@ -19,11 +19,13 @@ namespace fastdeploy { void BindPPOCRModel(pybind11::module& m); void BindPPOCRv3(pybind11::module& m); void BindPPOCRv2(pybind11::module& m); +void BindPPStructureV2Table(pybind11::module& m); void BindOcr(pybind11::module& m) { auto ocr_module = m.def_submodule("ocr", "Module to deploy OCR models"); BindPPOCRModel(ocr_module); BindPPOCRv3(ocr_module); BindPPOCRv2(ocr_module); + BindPPStructureV2Table(ocr_module); } } // namespace fastdeploy diff --git a/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc b/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc index ce74ae4c6..019a11f91 100644 --- a/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc +++ b/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc @@ -321,5 +321,94 @@ void BindPPOCRModel(pybind11::module& m) { self.BatchPredict(images, &ocr_result); return ocr_result; }); + + // Table + pybind11::class_(m, "StructureV2TablePreprocessor") + .def(pybind11::init<>()) + .def("run", [](vision::ocr::StructureV2TablePreprocessor& self, + std::vector& im_list) { + std::vector images; + for (size_t i = 0; i < im_list.size(); ++i) { + images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); + } + std::vector outputs; + if (!self.Run(&images, &outputs)) { + throw std::runtime_error( + "Failed to preprocess the input data in " + "StructureV2TablePreprocessor."); + } + + auto batch_det_img_info = self.GetBatchImgInfo(); + for (size_t i = 0; i < outputs.size(); ++i) { + outputs[i].StopSharing(); + } + + return std::make_pair(outputs, *batch_det_img_info); + }); + + pybind11::class_( + m, "StructureV2TablePostprocessor") + .def(pybind11::init()) + .def("run", + [](vision::ocr::StructureV2TablePostprocessor& self, + std::vector& inputs, + const std::vector>& batch_det_img_info) { + std::vector>> boxes; + std::vector> structure_list; + + if (!self.Run(inputs, &boxes, &structure_list, + batch_det_img_info)) { + throw std::runtime_error( + "Failed to preprocess the input data in " + "StructureV2TablePostprocessor."); + } + return std::make_pair(boxes, structure_list); + }) + .def("run", + [](vision::ocr::StructureV2TablePostprocessor& self, + std::vector& input_array, + const std::vector>& batch_det_img_info) { + std::vector inputs; + PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); + std::vector>> boxes; + std::vector> structure_list; + + if (!self.Run(inputs, &boxes, &structure_list, + batch_det_img_info)) { + throw std::runtime_error( + "Failed to preprocess the input data in " + "StructureV2TablePostprocessor."); + } + return std::make_pair(boxes, structure_list); + }); + + pybind11::class_( + m, "StructureV2Table") + .def(pybind11::init()) + .def(pybind11::init<>()) + .def_property_readonly("preprocessor", + &vision::ocr::StructureV2Table::GetPreprocessor) + .def_property_readonly("postprocessor", + &vision::ocr::StructureV2Table::GetPostprocessor) + .def("predict", + [](vision::ocr::StructureV2Table& self, pybind11::array& data) { + auto mat = PyArrayToCvMat(data); + vision::OCRResult ocr_result; + self.Predict(mat, &ocr_result); + return ocr_result; + }) + .def("batch_predict", [](vision::ocr::StructureV2Table& self, + std::vector& data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + + std::vector ocr_results; + self.BatchPredict(images, &ocr_results); + return ocr_results; + }); } } // namespace fastdeploy diff --git a/fastdeploy/vision/ocr/ppocr/ppocr_pybind.cc b/fastdeploy/vision/ocr/ppocr/ppocr_pybind.cc old mode 100755 new mode 100644 index 81af3d23e..af5597309 --- a/fastdeploy/vision/ocr/ppocr/ppocr_pybind.cc +++ b/fastdeploy/vision/ocr/ppocr/ppocr_pybind.cc @@ -12,64 +12,96 @@ // See the License for the specific language governing permissions and // limitations under the License. #include + #include "fastdeploy/pybind/main.h" namespace fastdeploy { void BindPPOCRv3(pybind11::module& m) { // PPOCRv3 - pybind11::class_( - m, "PPOCRv3") + pybind11::class_(m, "PPOCRv3") .def(pybind11::init()) .def(pybind11::init()) - .def_property("cls_batch_size", &pipeline::PPOCRv3::GetClsBatchSize, &pipeline::PPOCRv3::SetClsBatchSize) - .def_property("rec_batch_size", &pipeline::PPOCRv3::GetRecBatchSize, &pipeline::PPOCRv3::SetRecBatchSize) - .def("clone", [](pipeline::PPOCRv3& self) { - return self.Clone(); - }) - .def("predict", [](pipeline::PPOCRv3& self, - pybind11::array& data) { - auto mat = PyArrayToCvMat(data); - vision::OCRResult res; - self.Predict(&mat, &res); - return res; - }) - .def("batch_predict", [](pipeline::PPOCRv3& self, std::vector& data) { - std::vector images; - for (size_t i = 0; i < data.size(); ++i) { - images.push_back(PyArrayToCvMat(data[i])); - } - std::vector results; - self.BatchPredict(images, &results); - return results; - }); + .def_property("cls_batch_size", &pipeline::PPOCRv3::GetClsBatchSize, + &pipeline::PPOCRv3::SetClsBatchSize) + .def_property("rec_batch_size", &pipeline::PPOCRv3::GetRecBatchSize, + &pipeline::PPOCRv3::SetRecBatchSize) + .def("clone", [](pipeline::PPOCRv3& self) { return self.Clone(); }) + .def("predict", + [](pipeline::PPOCRv3& self, pybind11::array& data) { + auto mat = PyArrayToCvMat(data); + vision::OCRResult res; + self.Predict(&mat, &res); + return res; + }) + .def("batch_predict", + [](pipeline::PPOCRv3& self, std::vector& data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector results; + self.BatchPredict(images, &results); + return results; + }); } void BindPPOCRv2(pybind11::module& m) { // PPOCRv2 - pybind11::class_( - m, "PPOCRv2") + pybind11::class_(m, "PPOCRv2") .def(pybind11::init()) .def(pybind11::init()) - .def_property("cls_batch_size", &pipeline::PPOCRv2::GetClsBatchSize, &pipeline::PPOCRv2::SetClsBatchSize) - .def_property("rec_batch_size", &pipeline::PPOCRv2::GetRecBatchSize, &pipeline::PPOCRv2::SetRecBatchSize) - .def("clone", [](pipeline::PPOCRv2& self) { - return self.Clone(); - }) - .def("predict", [](pipeline::PPOCRv2& self, - pybind11::array& data) { - auto mat = PyArrayToCvMat(data); - vision::OCRResult res; - self.Predict(&mat, &res); - return res; - }) - .def("batch_predict", [](pipeline::PPOCRv2& self, std::vector& data) { + .def_property("cls_batch_size", &pipeline::PPOCRv2::GetClsBatchSize, + &pipeline::PPOCRv2::SetClsBatchSize) + .def_property("rec_batch_size", &pipeline::PPOCRv2::GetRecBatchSize, + &pipeline::PPOCRv2::SetRecBatchSize) + .def("clone", [](pipeline::PPOCRv2& self) { return self.Clone(); }) + .def("predict", + [](pipeline::PPOCRv2& self, pybind11::array& data) { + auto mat = PyArrayToCvMat(data); + vision::OCRResult res; + self.Predict(&mat, &res); + return res; + }) + .def("batch_predict", + [](pipeline::PPOCRv2& self, std::vector& data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector results; + self.BatchPredict(images, &results); + return results; + }); +} + +void BindPPStructureV2Table(pybind11::module& m) { + // PPStructureV2Table + pybind11::class_( + m, "PPStructureV2Table") + .def(pybind11::init()) + .def_property("rec_batch_size", + &pipeline::PPStructureV2Table::GetRecBatchSize, + &pipeline::PPStructureV2Table::SetRecBatchSize) + .def("clone", + [](pipeline::PPStructureV2Table& self) { return self.Clone(); }) + .def("predict", + [](pipeline::PPStructureV2Table& self, pybind11::array& data) { + auto mat = PyArrayToCvMat(data); + vision::OCRResult res; + self.Predict(&mat, &res); + return res; + }) + .def("batch_predict", [](pipeline::PPStructureV2Table& self, + std::vector& data) { std::vector images; for (size_t i = 0; i < data.size(); ++i) { images.push_back(PyArrayToCvMat(data[i])); diff --git a/fastdeploy/vision/ocr/ppocr/ppstructurev2_table.cc b/fastdeploy/vision/ocr/ppocr/ppstructurev2_table.cc new file mode 100644 index 000000000..d0b2fbb00 --- /dev/null +++ b/fastdeploy/vision/ocr/ppocr/ppstructurev2_table.cc @@ -0,0 +1,233 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision/ocr/ppocr/ppstructurev2_table.h" + +#include "fastdeploy/utils/perf.h" +#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h" + +namespace fastdeploy { +namespace pipeline { +PPStructureV2Table::PPStructureV2Table( + fastdeploy::vision::ocr::DBDetector* det_model, + fastdeploy::vision::ocr::Recognizer* rec_model, + fastdeploy::vision::ocr::StructureV2Table* table_model) + : detector_(det_model), recognizer_(rec_model), table_(table_model) { + Initialized(); +} + +bool PPStructureV2Table::SetRecBatchSize(int rec_batch_size) { + if (rec_batch_size < -1 || rec_batch_size == 0) { + FDERROR << "batch_size > 0 or batch_size == -1." << std::endl; + return false; + } + rec_batch_size_ = rec_batch_size; + return true; +} + +int PPStructureV2Table::GetRecBatchSize() { return rec_batch_size_; } + +bool PPStructureV2Table::Initialized() const { + if (detector_ != nullptr && !detector_->Initialized()) { + return false; + } + + if (recognizer_ != nullptr && !recognizer_->Initialized()) { + return false; + } + + if (table_ != nullptr && !table_->Initialized()) { + return false; + } + return true; +} + +std::unique_ptr PPStructureV2Table::Clone() const { + std::unique_ptr clone_model = + utils::make_unique(PPStructureV2Table(*this)); + clone_model->detector_ = detector_->Clone().release(); + clone_model->recognizer_ = recognizer_->Clone().release(); + clone_model->table_ = table_->Clone().release(); + return clone_model; +} + +bool PPStructureV2Table::Predict(cv::Mat* img, + fastdeploy::vision::OCRResult* result) { + return Predict(*img, result); +} + +bool PPStructureV2Table::Predict(const cv::Mat& img, + fastdeploy::vision::OCRResult* result) { + std::vector batch_result(1); + bool success = BatchPredict({img}, &batch_result); + if (!success) { + return success; + } + *result = std::move(batch_result[0]); + return true; +}; + +bool PPStructureV2Table::BatchPredict( + const std::vector& images, + std::vector* batch_result) { + batch_result->clear(); + batch_result->resize(images.size()); + std::vector>> batch_boxes(images.size()); + + if (!detector_->BatchPredict(images, &batch_boxes)) { + FDERROR << "There's error while detecting image in PPOCR." << std::endl; + return false; + } + + for (int i_batch = 0; i_batch < batch_boxes.size(); ++i_batch) { + vision::ocr::SortBoxes(&(batch_boxes[i_batch])); + (*batch_result)[i_batch].boxes = batch_boxes[i_batch]; + } + + for (int i_batch = 0; i_batch < images.size(); ++i_batch) { + fastdeploy::vision::OCRResult& ocr_result = (*batch_result)[i_batch]; + // Get croped images by detection result + const std::vector>& boxes = ocr_result.boxes; + const cv::Mat& img = images[i_batch]; + std::vector image_list; + if (boxes.size() == 0) { + image_list.emplace_back(img); + } else { + image_list.resize(boxes.size()); + for (size_t i_box = 0; i_box < boxes.size(); ++i_box) { + image_list[i_box] = vision::ocr::GetRotateCropImage(img, boxes[i_box]); + } + } + std::vector* cls_labels_ptr = &ocr_result.cls_labels; + std::vector* cls_scores_ptr = &ocr_result.cls_scores; + + std::vector* text_ptr = &ocr_result.text; + std::vector* rec_scores_ptr = &ocr_result.rec_scores; + + std::vector width_list; + for (int i = 0; i < image_list.size(); i++) { + width_list.push_back(float(image_list[i].cols) / image_list[i].rows); + } + std::vector indices = vision::ocr::ArgSort(width_list); + + for (size_t start_index = 0; start_index < image_list.size(); + start_index += rec_batch_size_) { + size_t end_index = + std::min(start_index + rec_batch_size_, image_list.size()); + if (!recognizer_->BatchPredict(image_list, text_ptr, rec_scores_ptr, + start_index, end_index, indices)) { + FDERROR << "There's error while recognizing image in PPOCR." + << std::endl; + return false; + } + } + } + + if (!table_->BatchPredict(images, batch_result)) { + FDERROR << "There's error while recognizing tables in images." << std::endl; + return false; + } + + for (int i_batch = 0; i_batch < batch_boxes.size(); ++i_batch) { + fastdeploy::vision::OCRResult& ocr_result = (*batch_result)[i_batch]; + std::vector> matched(ocr_result.table_boxes.size(), + std::vector()); + + std::vector ocr_box; + std::vector structure_box; + for (int i = 0; i < ocr_result.boxes.size(); i++) { + ocr_box = vision::ocr::Xyxyxyxy2Xyxy(ocr_result.boxes[i]); + ocr_box[0] -= 1; + ocr_box[1] -= 1; + ocr_box[2] += 1; + ocr_box[3] += 1; + + std::vector> dis_list(ocr_result.table_boxes.size(), + std::vector(3, 100000.0)); + + for (int j = 0; j < ocr_result.table_boxes.size(); j++) { + structure_box = vision::ocr::Xyxyxyxy2Xyxy(ocr_result.table_boxes[j]); + dis_list[j][0] = vision::ocr::Dis(ocr_box, structure_box); + dis_list[j][1] = 1 - vision::ocr::Iou(ocr_box, structure_box); + dis_list[j][2] = j; + } + + // find min dis idx + std::sort(dis_list.begin(), dis_list.end(), vision::ocr::ComparisonDis); + matched[dis_list[0][2]].push_back(ocr_result.text[i]); + } + + // get pred html + std::string html_str = ""; + int td_tag_idx = 0; + auto structure_html_tags = ocr_result.table_structure; + for (int i = 0; i < structure_html_tags.size(); i++) { + if (structure_html_tags[i].find("") != std::string::npos) { + if (structure_html_tags[i].find("") != std::string::npos) { + html_str += "") != std::string::npos) { + html_str += ""; + } else { + html_str += structure_html_tags[i]; + } + td_tag_idx += 1; + } else { + html_str += structure_html_tags[i]; + } + } + (*batch_result)[i_batch].table_html = html_str; + } + + return true; +} + +} // namespace pipeline +} // namespace fastdeploy diff --git a/fastdeploy/vision/ocr/ppocr/ppstructurev2_table.h b/fastdeploy/vision/ocr/ppocr/ppstructurev2_table.h new file mode 100755 index 000000000..18f28ba34 --- /dev/null +++ b/fastdeploy/vision/ocr/ppocr/ppstructurev2_table.h @@ -0,0 +1,93 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +#include "fastdeploy/fastdeploy_model.h" +#include "fastdeploy/vision/common/processors/transform.h" +#include "fastdeploy/vision/common/result.h" + +#include "fastdeploy/vision/ocr/ppocr/structurev2_table.h" +#include "fastdeploy/vision/ocr/ppocr/dbdetector.h" +#include "fastdeploy/vision/ocr/ppocr/recognizer.h" +#include "fastdeploy/vision/ocr/ppocr/utils/ocr_postprocess_op.h" +#include "fastdeploy/utils/unique_ptr.h" + +namespace fastdeploy { +/** \brief This pipeline can launch detection model, classification model and recognition model sequentially. All OCR pipeline APIs are defined inside this namespace. + * + */ +namespace pipeline { +/*! @brief PPStructureV2Table is used to load PP-OCRv2 series models provided by PaddleOCR. + */ +class FASTDEPLOY_DECL PPStructureV2Table : public FastDeployModel { + public: + /** \brief Set up the detection model path, recognition model path and table model path respectively. + * + * \param[in] det_model Path of detection model, e.g ./ch_PP-OCRv2_det_infer + * \param[in] rec_model Path of recognition model, e.g ./ch_PP-OCRv2_rec_infer + * \param[in] table_model Path of table recognition model, e.g ./en_ppstructure_mobile_v2.0_SLANet_infer + */ + PPStructureV2Table(fastdeploy::vision::ocr::DBDetector* det_model, + fastdeploy::vision::ocr::Recognizer* rec_model, + fastdeploy::vision::ocr::StructureV2Table* table_model); + + + /** \brief Clone a new PPStructureV2Table with less memory usage when multiple instances of the same model are created + * + * \return new PPStructureV2Table* type unique pointer + */ + std::unique_ptr Clone() const; + + /** \brief Predict the input image and get OCR result. + * + * \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format. + * \param[in] result The output OCR result will be writen to this structure. + * \return true if the prediction successed, otherwise false. + */ + virtual bool Predict(cv::Mat* img, fastdeploy::vision::OCRResult* result); + virtual bool Predict(const cv::Mat& img, + fastdeploy::vision::OCRResult* result); + /** \brief BatchPredict the input image and get OCR result. + * + * \param[in] images The list of input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format. + * \param[in] batch_result The output list of OCR result will be writen to this structure. + * \return true if the prediction successed, otherwise false. + */ + virtual bool BatchPredict(const std::vector& images, + std::vector* batch_result); + + bool Initialized() const override; + bool SetRecBatchSize(int rec_batch_size); + int GetRecBatchSize(); + + protected: + fastdeploy::vision::ocr::DBDetector* detector_ = nullptr; + fastdeploy::vision::ocr::Recognizer* recognizer_ = nullptr; + fastdeploy::vision::ocr::StructureV2Table* table_ = nullptr; + + private: + int rec_batch_size_ = 6; +}; + +namespace application { +namespace ocrsystem { + typedef pipeline::PPStructureV2Table PPStructureV2TableSystem; +} // namespace ocrsystem +} // namespace application + +} // namespace pipeline +} // namespace fastdeploy diff --git a/fastdeploy/vision/ocr/ppocr/structurev2_table.cc b/fastdeploy/vision/ocr/ppocr/structurev2_table.cc new file mode 100644 index 000000000..2dc9d543d --- /dev/null +++ b/fastdeploy/vision/ocr/ppocr/structurev2_table.cc @@ -0,0 +1,133 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision/ocr/ppocr/structurev2_table.h" + +#include "fastdeploy/utils/perf.h" +#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h" + +namespace fastdeploy { +namespace vision { +namespace ocr { + +StructureV2Table::StructureV2Table() {} +StructureV2Table::StructureV2Table(const std::string& model_file, + const std::string& params_file, + const std::string& table_char_dict_path, + const RuntimeOption& custom_option, + const ModelFormat& model_format) + : postprocessor_(table_char_dict_path) { + if (model_format == ModelFormat::ONNX) { + valid_cpu_backends = {Backend::ORT, Backend::OPENVINO}; + valid_gpu_backends = {Backend::ORT, Backend::TRT}; + } else { + valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO, + Backend::LITE}; + valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + valid_kunlunxin_backends = {Backend::LITE}; + valid_ascend_backends = {Backend::LITE}; + valid_sophgonpu_backends = {Backend::SOPHGOTPU}; + valid_rknpu_backends = {Backend::RKNPU2}; + } + + runtime_option = custom_option; + runtime_option.model_format = model_format; + runtime_option.model_file = model_file; + runtime_option.params_file = params_file; + initialized = Initialize(); +} + +// Init +bool StructureV2Table::Initialize() { + if (!InitRuntime()) { + FDERROR << "Failed to initialize fastdeploy backend." << std::endl; + return false; + } + return true; +} + +std::unique_ptr StructureV2Table::Clone() const { + std::unique_ptr clone_model = + utils::make_unique(StructureV2Table(*this)); + clone_model->SetRuntime(clone_model->CloneRuntime()); + return clone_model; +} + +bool StructureV2Table::Predict(const cv::Mat& img, + std::vector>* boxes_result, + std::vector* structure_result) { + std::vector>> det_results; + std::vector> structure_results; + if (!BatchPredict({img}, &det_results, &structure_results)) { + return false; + } + *boxes_result = std::move(det_results[0]); + *structure_result = std::move(structure_results[0]); + return true; +} + +bool StructureV2Table::Predict(const cv::Mat& img, + vision::OCRResult* ocr_result) { + if (!Predict(img, &(ocr_result->table_boxes), + &(ocr_result->table_structure))) { + return false; + } + return true; +} + +bool StructureV2Table::BatchPredict( + const std::vector& images, + std::vector* ocr_results) { + std::vector>> det_results; + std::vector> structure_results; + if (!BatchPredict(images, &det_results, &structure_results)) { + return false; + } + ocr_results->resize(det_results.size()); + for (int i = 0; i < det_results.size(); i++) { + (*ocr_results)[i].table_boxes = std::move(det_results[i]); + (*ocr_results)[i].table_structure = std::move(structure_results[i]); + } + return true; +} + +bool StructureV2Table::BatchPredict( + const std::vector& images, + std::vector>>* det_results, + std::vector>* structure_results) { + std::vector fd_images = WrapMat(images); + if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) { + FDERROR << "Failed to preprocess input image." << std::endl; + return false; + } + auto batch_det_img_info = preprocessor_.GetBatchImgInfo(); + + reused_input_tensors_[0].name = InputInfoOfRuntime(0).name; + if (!Infer(reused_input_tensors_, &reused_output_tensors_)) { + FDERROR << "Failed to inference by runtime." << std::endl; + return false; + } + + if (!postprocessor_.Run(reused_output_tensors_, det_results, + structure_results, *batch_det_img_info)) { + FDERROR << "Failed to postprocess the inference cls_results by runtime." + << std::endl; + return false; + } + return true; +} + +} // namespace ocr +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/ocr/ppocr/structurev2_table.h b/fastdeploy/vision/ocr/ppocr/structurev2_table.h new file mode 100755 index 000000000..2d8db1c5f --- /dev/null +++ b/fastdeploy/vision/ocr/ppocr/structurev2_table.h @@ -0,0 +1,113 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "fastdeploy/fastdeploy_model.h" +#include "fastdeploy/vision/common/processors/transform.h" +#include "fastdeploy/vision/common/result.h" +#include "fastdeploy/vision/ocr/ppocr/utils/ocr_postprocess_op.h" +#include "fastdeploy/vision/ocr/ppocr/structurev2_table_postprocessor.h" +#include "fastdeploy/vision/ocr/ppocr/structurev2_table_preprocessor.h" +#include "fastdeploy/utils/unique_ptr.h" + +namespace fastdeploy { +namespace vision { +/** \brief All OCR series model APIs are defined inside this namespace + * + */ +namespace ocr { + +/*! @brief DBDetector object is used to load the detection model provided by PaddleOCR. + */ +class FASTDEPLOY_DECL StructureV2Table : public FastDeployModel { + public: + StructureV2Table(); + /** \brief Set path of model file, and the configuration of runtime + * + * \param[in] model_file Path of model file, e.g ./en_ppstructure_mobile_v2.0_SLANet_infer/model.pdmodel. + * \param[in] params_file Path of parameter file, e.g ./en_ppstructure_mobile_v2.0_SLANet_infer/model.pdiparams, if the model format is ONNX, this parameter will be ignored. + * \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in `valid_cpu_backends`. + * \param[in] model_format Model format of the loaded model, default is Paddle format. + */ + StructureV2Table(const std::string& model_file, + const std::string& params_file = "", + const std::string& table_char_dict_path = "", + const RuntimeOption& custom_option = RuntimeOption(), + const ModelFormat& model_format = ModelFormat::PADDLE); + + /** \brief Clone a new StructureV2Table Recognizer with less memory usage when multiple instances of the same model are created + * + * \return new StructureV2Table* type unique pointer + */ + virtual std::unique_ptr Clone() const; + + /// Get model's name + std::string ModelName() const { return "ppocr/ocr_table"; } + + /** \brief Predict the input image and get OCR detection model result. + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format. + * \param[in] boxes_result The output of OCR detection model result will be writen to this structure. + * \return true if the prediction is successed, otherwise false. + */ + virtual bool Predict(const cv::Mat& img, + std::vector>* boxes_result, + std::vector* structure_result); + + /** \brief Predict the input image and get OCR detection model result. + * + * \param[in] img The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format. + * \param[in] ocr_result The output of OCR detection model result will be writen to this structure. + * \return true if the prediction is successed, otherwise false. + */ + virtual bool Predict(const cv::Mat& img, vision::OCRResult* ocr_result); + + /** \brief BatchPredict the input image and get OCR detection model result. + * + * \param[in] images The list input of image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format. + * \param[in] det_results The output of OCR detection model result will be writen to this structure. + * \return true if the prediction is successed, otherwise false. + */ + virtual bool BatchPredict(const std::vector& images, + std::vector>>* det_results, + std::vector>* structure_results); + + /** \brief BatchPredict the input image and get OCR detection model result. + * + * \param[in] images The list input of image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format. + * \param[in] ocr_results The output of OCR detection model result will be writen to this structure. + * \return true if the prediction is successed, otherwise false. + */ + virtual bool BatchPredict(const std::vector& images, + std::vector* ocr_results); + + /// Get preprocessor reference of StructureV2TablePreprocessor + virtual StructureV2TablePreprocessor& GetPreprocessor() { + return preprocessor_; + } + + /// Get postprocessor reference of StructureV2TablePostprocessor + virtual StructureV2TablePostprocessor& GetPostprocessor() { + return postprocessor_; + } + + private: + bool Initialize(); + StructureV2TablePreprocessor preprocessor_; + StructureV2TablePostprocessor postprocessor_; +}; + +} // namespace ocr +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/ocr/ppocr/structurev2_table_postprocessor.cc b/fastdeploy/vision/ocr/ppocr/structurev2_table_postprocessor.cc new file mode 100644 index 000000000..238da28b3 --- /dev/null +++ b/fastdeploy/vision/ocr/ppocr/structurev2_table_postprocessor.cc @@ -0,0 +1,170 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision/ocr/ppocr/structurev2_table_postprocessor.h" + +#include "fastdeploy/utils/perf.h" +#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h" + +namespace fastdeploy { +namespace vision { +namespace ocr { + +StructureV2TablePostprocessor::StructureV2TablePostprocessor() { + initialized_ = false; +} + +StructureV2TablePostprocessor::StructureV2TablePostprocessor( + const std::string& dict_path) { + std::ifstream in(dict_path); + FDASSERT(in, "Cannot open file %s to read.", dict_path.c_str()); + std::string line; + dict_character.clear(); + dict_character.push_back("sos"); // add special character + while (getline(in, line)) { + dict_character.push_back(line); + } + + if (merge_no_span_structure) { + if (std::find(dict_character.begin(), dict_character.end(), "") == + dict_character.end()) { + dict_character.push_back(""); + } + for (auto it = dict_character.begin(); it != dict_character.end();) { + if (*it == ""}; + std::map dict; + int ignore_beg_token_idx; + int ignore_end_token_idx; + int dict_end_idx; + bool initialized_ = false; +}; + +} // namespace ocr +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/ocr/ppocr/structurev2_table_preprocessor.cc b/fastdeploy/vision/ocr/ppocr/structurev2_table_preprocessor.cc new file mode 100644 index 000000000..e03b200b2 --- /dev/null +++ b/fastdeploy/vision/ocr/ppocr/structurev2_table_preprocessor.cc @@ -0,0 +1,105 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision/ocr/ppocr/structurev2_table_preprocessor.h" + +#include "fastdeploy/function/concat.h" +#include "fastdeploy/utils/perf.h" +#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h" + +namespace fastdeploy { +namespace vision { +namespace ocr { + +StructureV2TablePreprocessor::StructureV2TablePreprocessor() { + resize_op_ = std::make_shared(-1, -1); + + std::vector value = {0, 0, 0}; + pad_op_ = std::make_shared(0, 0, 0, 0, value); + + std::vector mean = {0.485f, 0.456f, 0.406f}; + std::vector std = {0.229f, 0.224f, 0.225f}; + normalize_op_ = std::make_shared(mean, std, true); + hwc2chw_op_ = std::make_shared(); +} + +void StructureV2TablePreprocessor::StructureV2TableResizeImage(FDMat* mat, + int batch_idx) { + float img_h = float(rec_image_shape_[1]); + float img_w = float(rec_image_shape_[2]); + float width = float(mat->Width()); + float height = float(mat->Height()); + float ratio = max_len / (std::max(height, width) * 1.0); + int resize_h = int(height * ratio); + int resize_w = int(width * ratio); + + resize_op_->SetWidthAndHeight(resize_w, resize_h); + (*resize_op_)(mat); + + (*normalize_op_)(mat); + pad_op_->SetPaddingSize(0, int(max_len - resize_h), 0, + int(max_len - resize_w)); + (*pad_op_)(mat); + + (*hwc2chw_op_)(mat); + batch_det_img_info_[batch_idx] = {int(width), int(height), resize_w, + resize_h}; +} + +bool StructureV2TablePreprocessor::Run(std::vector* images, + std::vector* outputs, + size_t start_index, size_t end_index, + const std::vector& indices) { + if (images->size() == 0 || end_index <= start_index || + end_index > images->size()) { + FDERROR << "images->size() or index error. Correct is: 0 <= start_index < " + "end_index <= images->size()" + << std::endl; + return false; + } + + std::vector mats(end_index - start_index); + for (size_t i = start_index; i < end_index; ++i) { + size_t real_index = i; + if (indices.size() != 0) { + real_index = indices[i]; + } + mats[i - start_index] = images->at(real_index); + } + return Run(&mats, outputs); +} + +bool StructureV2TablePreprocessor::Apply(FDMatBatch* image_batch, + std::vector* outputs) { + batch_det_img_info_.clear(); + batch_det_img_info_.resize(image_batch->mats->size()); + for (size_t i = 0; i < image_batch->mats->size(); ++i) { + FDMat* mat = &(image_batch->mats->at(i)); + StructureV2TableResizeImage(mat, i); + } + + // Only have 1 output Tensor. + outputs->resize(1); + // Get the NCHW tensor + FDTensor* tensor = image_batch->Tensor(); + (*outputs)[0].SetExternalData(tensor->Shape(), tensor->Dtype(), + tensor->Data(), tensor->device, + tensor->device_id); + + return true; +} + +} // namespace ocr +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/ocr/ppocr/structurev2_table_preprocessor.h b/fastdeploy/vision/ocr/ppocr/structurev2_table_preprocessor.h new file mode 100644 index 000000000..9e8a03fcc --- /dev/null +++ b/fastdeploy/vision/ocr/ppocr/structurev2_table_preprocessor.h @@ -0,0 +1,74 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "fastdeploy/vision/common/processors/transform.h" +#include "fastdeploy/vision/common/processors/manager.h" +#include "fastdeploy/vision/common/result.h" + +namespace fastdeploy { +namespace vision { + +namespace ocr { +/*! @brief Preprocessor object for table model. + */ +class FASTDEPLOY_DECL StructureV2TablePreprocessor : public ProcessorManager { + public: + StructureV2TablePreprocessor(); + using ProcessorManager::Run; + /** \brief Process the input image and prepare input tensors for runtime + * + * \param[in] images The input data list, all the elements are FDMat + * \param[in] outputs The output tensors which will be fed into runtime + * \return true if the preprocess successed, otherwise false + */ + bool Run(std::vector* images, std::vector* outputs, + size_t start_index, size_t end_index, + const std::vector& indices); + + /** \brief Implement the virtual function of ProcessorManager, Apply() is the + * body of Run(). Apply() contains the main logic of preprocessing, Run() is + * called by users to execute preprocessing + * + * \param[in] image_batch The input image batch + * \param[in] outputs The output tensors which will feed in runtime + * \return true if the preprocess successed, otherwise false + */ + virtual bool Apply(FDMatBatch* image_batch, std::vector* outputs); + + /// Get the image info of the last batch, return a list of array + /// {image width, image height, resize width, resize height} + const std::vector>* GetBatchImgInfo() { + return &batch_det_img_info_; + } + + private: + void StructureV2TableResizeImage(FDMat* mat, int batch_idx); + // for recording the switch of hwc2chw + bool disable_permute_ = false; + // for recording the switch of normalize + bool disable_normalize_ = false; + int max_len = 488; + std::vector rec_image_shape_ = {3, max_len, max_len}; + bool static_shape_infer_ = false; + std::shared_ptr resize_op_; + std::shared_ptr pad_op_; + std::shared_ptr normalize_op_; + std::shared_ptr hwc2chw_op_; + std::vector> batch_det_img_info_; +}; + +} // namespace ocr +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/ocr/ppocr/utils/matcher.cc b/fastdeploy/vision/ocr/ppocr/utils/matcher.cc new file mode 100644 index 000000000..7fa397bed --- /dev/null +++ b/fastdeploy/vision/ocr/ppocr/utils/matcher.cc @@ -0,0 +1,89 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h" + +namespace fastdeploy { +namespace vision { +namespace ocr { + +std::vector Xyxyxyxy2Xyxy(std::array &box) { + int x_collect[4] = {box[0], box[2], box[4], box[6]}; + int y_collect[4] = {box[1], box[3], box[5], box[7]}; + int left = int(*std::min_element(x_collect, x_collect + 4)); + int right = int(*std::max_element(x_collect, x_collect + 4)); + int top = int(*std::min_element(y_collect, y_collect + 4)); + int bottom = int(*std::max_element(y_collect, y_collect + 4)); + std::vector box1(4, 0); + box1[0] = left; + box1[1] = top; + box1[2] = right; + box1[3] = bottom; + return box1; +} + +float Dis(std::vector &box1, std::vector &box2) { + float x1_1 = float(box1[0]); + float y1_1 = float(box1[1]); + float x2_1 = float(box1[2]); + float y2_1 = float(box1[3]); + + float x1_2 = float(box2[0]); + float y1_2 = float(box2[1]); + float x2_2 = float(box2[2]); + float y2_2 = float(box2[3]); + + float dis = std::abs(x1_2 - x1_1) + std::abs(y1_2 - y1_1) + + std::abs(x2_2 - x2_1) + std::abs(y2_2 - y2_1); + float dis_2 = std::abs(x1_2 - x1_1) + std::abs(y1_2 - y1_1); + float dis_3 = std::abs(x2_2 - x2_1) + std::abs(y2_2 - y2_1); + return dis + std::min(dis_2, dis_3); +} + +float Iou(std::vector &box1, std::vector &box2) { + int area1 = std::max(0, box1[2] - box1[0]) * std::max(0, box1[3] - box1[1]); + int area2 = std::max(0, box2[2] - box2[0]) * std::max(0, box2[3] - box2[1]); + + // computing the sum_area + int sum_area = area1 + area2; + + // find the each point of intersect rectangle + int x1 = std::max(box1[0], box2[0]); + int y1 = std::max(box1[1], box2[1]); + int x2 = std::min(box1[2], box2[2]); + int y2 = std::min(box1[3], box2[3]); + + // judge if there is an intersect + if (y1 >= y2 || x1 >= x2) { + return 0.0; + } else { + int intersect = (x2 - x1) * (y2 - y1); + return intersect / (sum_area - intersect + 0.00000001); + } +} + +bool ComparisonDis(const std::vector &dis1, + const std::vector &dis2) { + if (dis1[1] < dis2[1]) { + return true; + } else if (dis1[1] == dis2[1]) { + return dis1[0] < dis2[0]; + } else { + return false; + } +} + +} // namespace ocr +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h b/fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h index 101926cb5..07ff854a3 100755 --- a/fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h +++ b/fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h @@ -34,6 +34,15 @@ FASTDEPLOY_DECL void SortBoxes(std::vector>* boxes); FASTDEPLOY_DECL std::vector ArgSort(const std::vector &array); +FASTDEPLOY_DECL std::vector Xyxyxyxy2Xyxy(std::array &box); + +FASTDEPLOY_DECL float Dis(std::vector &box1, std::vector &box2); + +FASTDEPLOY_DECL float Iou(std::vector &box1, std::vector &box2); + +FASTDEPLOY_DECL bool ComparisonDis(const std::vector &dis1, + const std::vector &dis2); + } // namespace ocr } // namespace vision } // namespace fastdeploy diff --git a/python/fastdeploy/vision/ocr/ppocr/__init__.py b/python/fastdeploy/vision/ocr/ppocr/__init__.py index 0cbb6385f..30dcf8a83 100755 --- a/python/fastdeploy/vision/ocr/ppocr/__init__.py +++ b/python/fastdeploy/vision/ocr/ppocr/__init__.py @@ -648,6 +648,107 @@ class Recognizer(FastDeployModel): self._model.preprocessor.rec_image_shape = value +class StructureV2TablePreprocessor: + def __init__(self): + """Create a preprocessor for StructureV2TableModel + """ + self._preprocessor = C.vision.ocr.StructureV2TablePreprocessor() + + def run(self, input_ims): + """Preprocess input images for StructureV2TableModel + :param: input_ims: (list of numpy.ndarray)The input image + :return: list of FDTensor + """ + return self._preprocessor.run(input_ims) + + +class StructureV2TablePostprocessor: + def __init__(self): + """Create a postprocessor for StructureV2TableModel + """ + self._postprocessor = C.vision.ocr.StructureV2TablePostprocessor() + + def run(self, runtime_results): + """Postprocess the runtime results for StructureV2TableModel + :param: runtime_results: (list of FDTensor or list of pyArray)The output FDTensor results from runtime + :return: list of Result(If the runtime_results is predict by batched samples, the length of this list equals to the batch size) + """ + return self._postprocessor.run(runtime_results) + + +class StructureV2Table(FastDeployModel): + def __init__(self, + model_file="", + params_file="", + table_char_dict_path="", + runtime_option=None, + model_format=ModelFormat.PADDLE): + """Load OCR StructureV2Table model provided by PaddleOCR. + + :param model_file: (str)Path of model file, e.g ./ch_ppocr_mobile_v2.0_cls_infer/model.pdmodel. + :param params_file: (str)Path of parameter file, e.g ./ch_ppocr_mobile_v2.0_cls_infer/model.pdiparams, if the model format is ONNX, this parameter will be ignored. + :param runtime_option: (fastdeploy.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU. + :param model_format: (fastdeploy.ModelForamt)Model format of the loaded model. + """ + super(StructureV2Table, self).__init__(runtime_option) + + if (len(model_file) == 0): + self._model = C.vision.ocr.StructureV2Table() + self._runnable = False + else: + self._model = C.vision.ocr.StructureV2Table( + model_file, params_file, table_char_dict_path, + self._runtime_option, model_format) + assert self.initialized, "Classifier initialize failed." + self._runnable = True + + def clone(self): + """Clone OCR StructureV2Table model object + :return: a new OCR StructureV2Table model object + """ + + class StructureV2TableClone(StructureV2Table): + def __init__(self, model): + self._model = model + + clone_model = StructureV2TableClone(self._model.clone()) + return clone_model + + def predict(self, input_image): + """Predict an input image + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :return: bbox, structure + """ + if self._runnable: + return self._model.predict(input_image) + return False + + def batch_predict(self, images): + """Predict a batch of input image + :param images: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return: list of bbox list, list of structure + """ + if self._runnable: + return self._model.batch_predict(images) + return False + + @property + def preprocessor(self): + return self._model.preprocessor + + @preprocessor.setter + def preprocessor(self, value): + self._model.preprocessor = value + + @property + def postprocessor(self): + return self._model.postprocessor + + @postprocessor.setter + def postprocessor(self, value): + self._model.postprocessor = value + + class PPOCRv3(FastDeployModel): def __init__(self, det_model=None, cls_model=None, rec_model=None): """Consruct a pipeline with text detector, direction classifier and text recognizer models @@ -800,3 +901,58 @@ class PPOCRSystemv2(PPOCRv2): def predict(self, input_image): return super(PPOCRSystemv2, self).predict(input_image) + + +class PPStructureV2Table(FastDeployModel): + def __init__(self, det_model=None, rec_model=None, table_model=None): + """Consruct a pipeline with text detector, text recognizer and table recognizer models + + :param det_model: (FastDeployModel) The detection model object created by fastdeploy.vision.ocr.DBDetector. + :param rec_model: (FastDeployModel) The recognition model object created by fastdeploy.vision.ocr.Recognizer. + :param table_model: (FastDeployModel) The table recognition model object created by fastdeploy.vision.ocr.Table. + """ + assert det_model is not None and rec_model is not None and table_model is not None, "The det_model, rec_model and table_model cannot be None." + self.system_ = C.vision.ocr.PPStructureV2Table( + det_model._model, + rec_model._model, + table_model._model, ) + + def clone(self): + """Clone PPStructureV2Table pipeline object + :return: a new PPStructureV2Table pipeline object + """ + + class PPStructureV2TableClone(PPStructureV2Table): + def __init__(self, system): + self.system_ = system + + clone_model = PPStructureV2TableClone(self.system_.clone()) + return clone_model + + def predict(self, input_image): + """Predict an input image + + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :return: OCRResult + """ + return self.system_.predict(input_image) + + def batch_predict(self, images): + """Predict a batch of input image + :param images: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return: OCRBatchResult + """ + + return self.system_.batch_predict(images) + + +class PPStructureV2TableSystem(PPStructureV2Table): + def __init__(self, det_model=None, rec_model=None, table_model=None): + logging.warning( + "DEPRECATED: fd.vision.ocr.PPStructureV2TableSystem is deprecated, " + "please use fd.vision.ocr.PPStructureV2Table instead.") + super(PPStructureV2TableSystem, self).__init__(det_model, rec_model, + table_model) + + def predict(self, input_image): + return super(PPStructureV2TableSystem, self).predict(input_image)
"; + } + if (matched[td_tag_idx].size() > 0) { + bool b_with = false; + if (matched[td_tag_idx][0].find("") != std::string::npos && + matched[td_tag_idx].size() > 1) { + b_with = true; + html_str += ""; + } + for (int j = 0; j < matched[td_tag_idx].size(); j++) { + std::string content = matched[td_tag_idx][j]; + if (matched[td_tag_idx].size() > 1) { + // remove blank, and + if (content.length() > 0 && content.at(0) == ' ') { + content = content.substr(0); + } + if (content.length() > 2 && content.substr(0, 3) == "") { + content = content.substr(3); + } + if (content.length() > 4 && + content.substr(content.length() - 4) == "") { + content = content.substr(0, content.length() - 4); + } + if (content.empty()) { + continue; + } + // add blank + if (j != matched[td_tag_idx].size() - 1 && + content.at(content.length() - 1) != ' ') { + content += ' '; + } + } + html_str += content; + } + if (b_with) { + html_str += ""; + } + } + if (structure_html_tags[i].find("") { + it = dict_character.erase(it); + } else { + ++it; + } + } + } + + dict_character.push_back("eos"); // add special character + dict.clear(); + for (size_t i = 0; i < dict_character.size(); i++) { + dict[dict_character[i]] = int(i); + if (dict_character[i] == "beg") { + ignore_beg_token_idx = i; + } else if (dict_character[i] == "end") { + ignore_end_token_idx = i; + } + } + dict_end_idx = dict_character.size() - 1; + + initialized_ = true; +} + +bool StructureV2TablePostprocessor::SingleBatchPostprocessor( + const float* structure_probs, const float* bbox_preds, size_t slice_dim, + size_t prob_dim, size_t box_dim, int img_width, int img_height, + std::vector>* boxes_result, + std::vector* structure_list_result) { + structure_list_result->push_back(""); + structure_list_result->push_back(""); + structure_list_result->push_back(""); + + for (int i = 0; i < slice_dim; i++) { + int structure_idx = 0; + float structure_prob = structure_probs[i * prob_dim]; + for (int j = 0; j < prob_dim; j++) { + if (structure_probs[i * prob_dim + j] > structure_prob) { + structure_prob = structure_probs[i * prob_dim + j]; + structure_idx = j; + } + } + + if (structure_idx > 0 && structure_idx == dict_end_idx) break; + + if (structure_idx == ignore_end_token_idx || + structure_idx == ignore_beg_token_idx) + continue; + + std::string text = dict_character[structure_idx]; + if (std::find(td_tokens.begin(), td_tokens.end(), text) != + td_tokens.end()) { + std::array bbox; + // box dim: en->4, ch->8 + if (box_dim == 4) { + bbox[0] = bbox_preds[i * box_dim] * img_width; + bbox[1] = bbox_preds[i * box_dim + 1] * img_height; + + bbox[2] = bbox_preds[i * box_dim + 2] * img_width; + bbox[3] = bbox_preds[i * box_dim + 1] * img_height; + + bbox[4] = bbox_preds[i * box_dim + 2] * img_width; + bbox[5] = bbox_preds[i * box_dim + 3] * img_height; + + bbox[6] = bbox_preds[i * box_dim] * img_width; + bbox[7] = bbox_preds[i * box_dim + 3] * img_height; + } else { + for (int k = 0; k < 8; k++) { + float bbox_pred = bbox_preds[i * box_dim + k]; + bbox[k] = + int(k % 2 == 0 ? bbox_pred * img_width : bbox_pred * img_height); + } + } + + boxes_result->push_back(bbox); + } + structure_list_result->push_back(text); + } + structure_list_result->push_back("
"); + structure_list_result->push_back(""); + structure_list_result->push_back(""); + + return true; +} + +bool StructureV2TablePostprocessor::Run( + const std::vector& tensors, + std::vector>>* bbox_batch_list, + std::vector>* structure_batch_list, + const std::vector>& batch_det_img_info) { + // Table have 2 output tensors. + const FDTensor& structure_probs = tensors[1]; + const FDTensor& bbox_preds = tensors[0]; + + const float* structure_probs_data = + reinterpret_cast(structure_probs.Data()); + size_t structure_probs_length = + accumulate(structure_probs.shape.begin() + 1, structure_probs.shape.end(), + 1, std::multiplies()); + + const float* bbox_preds_data = + reinterpret_cast(bbox_preds.Data()); + size_t bbox_preds_length = + accumulate(bbox_preds.shape.begin() + 1, bbox_preds.shape.end(), 1, + std::multiplies()); + size_t batch = bbox_preds.shape[0]; + size_t slice_dim = bbox_preds.shape[1]; + size_t prob_dim = structure_probs.shape[2]; + size_t box_dim = bbox_preds.shape[2]; + + bbox_batch_list->resize(batch); + structure_batch_list->resize(batch); + + for (int i_batch = 0; i_batch < batch; ++i_batch) { + SingleBatchPostprocessor( + structure_probs_data, bbox_preds_data, slice_dim, prob_dim, box_dim, + batch_det_img_info[i_batch][0], batch_det_img_info[i_batch][1], + &bbox_batch_list->at(i_batch), &structure_batch_list->at(i_batch)); + structure_probs_data = structure_probs_data + structure_probs_length; + bbox_preds_data = bbox_preds_data + bbox_preds_length; + } + return true; +} + +} // namespace ocr +} // namespace vision +} // namespace fastdeploy diff --git a/fastdeploy/vision/ocr/ppocr/structurev2_table_postprocessor.h b/fastdeploy/vision/ocr/ppocr/structurev2_table_postprocessor.h new file mode 100644 index 000000000..a617e068c --- /dev/null +++ b/fastdeploy/vision/ocr/ppocr/structurev2_table_postprocessor.h @@ -0,0 +1,71 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "fastdeploy/vision/common/processors/transform.h" +#include "fastdeploy/vision/common/result.h" +#include "fastdeploy/vision/ocr/ppocr/utils/ocr_postprocess_op.h" + +namespace fastdeploy { +namespace vision { + +namespace ocr { +/*! @brief Postprocessor object for DBDetector serials model. + */ +class FASTDEPLOY_DECL StructureV2TablePostprocessor { + public: + StructureV2TablePostprocessor(); + /** \brief Create a postprocessor instance for Recognizer serials model + * + * \param[in] label_path The path of label_dict + */ + explicit StructureV2TablePostprocessor(const std::string& dict_path); + + /** \brief Process the result of runtime and fill to RecognizerResult + * + * \param[in] tensors The inference result from runtime + * \param[in] texts The output text results of recognizer + * \param[in] rec_scores The output score results of recognizer + * \return true if the postprocess successed, otherwise false + */ + bool Run(const std::vector& tensors, + std::vector>>* bbox_batch_list, + std::vector>* structure_batch_list, + const std::vector>& batch_det_img_info); + + private: + PostProcessor util_post_processor_; + bool SingleBatchPostprocessor(const float* structure_probs, + const float* bbox_preds, + size_t slice_dim, + size_t prob_dim, + size_t box_dim, + int img_width, + int img_height, + std::vector>* boxes_result, + std::vector* structure_list_result); + + bool merge_no_span_structure{true}; + std::vector dict_character; + std::vector td_tokens{"
", "