mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
[Hackthon_4th 242] Support en_ppstructure_mobile_v2.0_SLANet (#1816)
* first draft * update api name * fix bug * fix bug and * fix bug in c api * fix bug in c_api --------- Co-authored-by: DefTruth <31974251+DefTruth@users.noreply.github.com>
This commit is contained in:
@@ -22,6 +22,7 @@ add_executable(benchmark_ppmatting ${PROJECT_SOURCE_DIR}/benchmark_ppmatting.cc)
|
|||||||
add_executable(benchmark_ppocr_det ${PROJECT_SOURCE_DIR}/benchmark_ppocr_det.cc)
|
add_executable(benchmark_ppocr_det ${PROJECT_SOURCE_DIR}/benchmark_ppocr_det.cc)
|
||||||
add_executable(benchmark_ppocr_cls ${PROJECT_SOURCE_DIR}/benchmark_ppocr_cls.cc)
|
add_executable(benchmark_ppocr_cls ${PROJECT_SOURCE_DIR}/benchmark_ppocr_cls.cc)
|
||||||
add_executable(benchmark_ppocr_rec ${PROJECT_SOURCE_DIR}/benchmark_ppocr_rec.cc)
|
add_executable(benchmark_ppocr_rec ${PROJECT_SOURCE_DIR}/benchmark_ppocr_rec.cc)
|
||||||
|
add_executable(benchmark_structurev2_table ${PROJECT_SOURCE_DIR}/benchmark_structurev2_table.cc)
|
||||||
add_executable(benchmark_ppyoloe_r ${PROJECT_SOURCE_DIR}/benchmark_ppyoloe_r.cc)
|
add_executable(benchmark_ppyoloe_r ${PROJECT_SOURCE_DIR}/benchmark_ppyoloe_r.cc)
|
||||||
add_executable(benchmark_ppyoloe_r_sophgo ${PROJECT_SOURCE_DIR}/benchmark_ppyoloe_r_sophgo.cc)
|
add_executable(benchmark_ppyoloe_r_sophgo ${PROJECT_SOURCE_DIR}/benchmark_ppyoloe_r_sophgo.cc)
|
||||||
add_executable(benchmark_ppyolo ${PROJECT_SOURCE_DIR}/benchmark_ppyolo.cc)
|
add_executable(benchmark_ppyolo ${PROJECT_SOURCE_DIR}/benchmark_ppyolo.cc)
|
||||||
@@ -55,6 +56,7 @@ if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
|
|||||||
target_link_libraries(benchmark_ppocr_det ${FASTDEPLOY_LIBS} gflags pthread)
|
target_link_libraries(benchmark_ppocr_det ${FASTDEPLOY_LIBS} gflags pthread)
|
||||||
target_link_libraries(benchmark_ppocr_cls ${FASTDEPLOY_LIBS} gflags pthread)
|
target_link_libraries(benchmark_ppocr_cls ${FASTDEPLOY_LIBS} gflags pthread)
|
||||||
target_link_libraries(benchmark_ppocr_rec ${FASTDEPLOY_LIBS} gflags pthread)
|
target_link_libraries(benchmark_ppocr_rec ${FASTDEPLOY_LIBS} gflags pthread)
|
||||||
|
target_link_libraries(benchmark_structurev2_table ${FASTDEPLOY_LIBS} gflags pthread)
|
||||||
target_link_libraries(benchmark_ppyolo ${FASTDEPLOY_LIBS} gflags pthread)
|
target_link_libraries(benchmark_ppyolo ${FASTDEPLOY_LIBS} gflags pthread)
|
||||||
target_link_libraries(benchmark_yolov3 ${FASTDEPLOY_LIBS} gflags pthread)
|
target_link_libraries(benchmark_yolov3 ${FASTDEPLOY_LIBS} gflags pthread)
|
||||||
target_link_libraries(benchmark_fasterrcnn ${FASTDEPLOY_LIBS} gflags pthread)
|
target_link_libraries(benchmark_fasterrcnn ${FASTDEPLOY_LIBS} gflags pthread)
|
||||||
@@ -85,6 +87,7 @@ else()
|
|||||||
target_link_libraries(benchmark_ppocr_det ${FASTDEPLOY_LIBS} gflags)
|
target_link_libraries(benchmark_ppocr_det ${FASTDEPLOY_LIBS} gflags)
|
||||||
target_link_libraries(benchmark_ppocr_cls ${FASTDEPLOY_LIBS} gflags)
|
target_link_libraries(benchmark_ppocr_cls ${FASTDEPLOY_LIBS} gflags)
|
||||||
target_link_libraries(benchmark_ppocr_rec ${FASTDEPLOY_LIBS} gflags)
|
target_link_libraries(benchmark_ppocr_rec ${FASTDEPLOY_LIBS} gflags)
|
||||||
|
target_link_libraries(benchmark_structurev2_table ${FASTDEPLOY_LIBS} gflags)
|
||||||
target_link_libraries(benchmark_ppyolo ${FASTDEPLOY_LIBS} gflags)
|
target_link_libraries(benchmark_ppyolo ${FASTDEPLOY_LIBS} gflags)
|
||||||
target_link_libraries(benchmark_yolov3 ${FASTDEPLOY_LIBS} gflags)
|
target_link_libraries(benchmark_yolov3 ${FASTDEPLOY_LIBS} gflags)
|
||||||
target_link_libraries(benchmark_fasterrcnn ${FASTDEPLOY_LIBS} gflags)
|
target_link_libraries(benchmark_fasterrcnn ${FASTDEPLOY_LIBS} gflags)
|
||||||
|
@@ -44,6 +44,7 @@ fi
|
|||||||
./benchmark_ppocr_rec --model ch_PP-OCRv3_rec_infer --image rec_img.jpg --rec_label_file ppocr_keys_v1.txt --config_path $CONFIG_PATH
|
./benchmark_ppocr_rec --model ch_PP-OCRv3_rec_infer --image rec_img.jpg --rec_label_file ppocr_keys_v1.txt --config_path $CONFIG_PATH
|
||||||
./benchmark_ppocr_det --model ch_PP-OCRv2_det_infer --image 12.jpg --config_path $CONFIG_PATH
|
./benchmark_ppocr_det --model ch_PP-OCRv2_det_infer --image 12.jpg --config_path $CONFIG_PATH
|
||||||
./benchmark_ppocr_rec --model ch_PP-OCRv2_rec_infer --image rec_img.jpg --rec_label_file ppocr_keys_v1.txt --config_path $CONFIG_PATH
|
./benchmark_ppocr_rec --model ch_PP-OCRv2_rec_infer --image rec_img.jpg --rec_label_file ppocr_keys_v1.txt --config_path $CONFIG_PATH
|
||||||
|
./benchmark_ppocr_table --model en_ppstructure_mobile_v2.0_SLANet_infer --image table.jpg --table_char_dict_path table_structure_dict.txt --config_path $CONFIG_PATH
|
||||||
|
|
||||||
# PaddleDetection
|
# PaddleDetection
|
||||||
./benchmark_ppyolov5 --model yolov5_s_300e_coco --image 000000014439.jpg --config_path $CONFIG_PATH
|
./benchmark_ppyolov5 --model yolov5_s_300e_coco --image 000000014439.jpg --config_path $CONFIG_PATH
|
||||||
|
161
benchmark/cpp/benchmark_structurev2_table.cc
Executable file
161
benchmark/cpp/benchmark_structurev2_table.cc
Executable file
@@ -0,0 +1,161 @@
|
|||||||
|
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "flags.h"
|
||||||
|
#include "macros.h"
|
||||||
|
#include "option.h"
|
||||||
|
|
||||||
|
namespace vision = fastdeploy::vision;
|
||||||
|
namespace benchmark = fastdeploy::benchmark;
|
||||||
|
|
||||||
|
DEFINE_string(table_char_dict_path, "",
|
||||||
|
"Path of table character dict of PPOCR.");
|
||||||
|
DEFINE_string(trt_shape, "1,3,48,10:4,3,48,320:8,3,48,2304",
|
||||||
|
"Set min/opt/max shape for trt/paddle_trt backend."
|
||||||
|
"eg:--trt_shape 1,3,48,10:4,3,48,320:8,3,48,2304");
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]) {
|
||||||
|
#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
|
||||||
|
// Initialization
|
||||||
|
auto option = fastdeploy::RuntimeOption();
|
||||||
|
if (!CreateRuntimeOption(&option, argc, argv, true)) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
auto im = cv::imread(FLAGS_image);
|
||||||
|
std::unordered_map<std::string, std::string> config_info;
|
||||||
|
benchmark::ResultManager::LoadBenchmarkConfig(FLAGS_config_path,
|
||||||
|
&config_info);
|
||||||
|
std::string model_name, params_name, config_name;
|
||||||
|
auto model_format = fastdeploy::ModelFormat::PADDLE;
|
||||||
|
if (!UpdateModelResourceName(&model_name, ¶ms_name, &config_name,
|
||||||
|
&model_format, config_info, false)) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
auto model_file = FLAGS_model + sep + model_name;
|
||||||
|
auto params_file = FLAGS_model + sep + params_name;
|
||||||
|
if (config_info["backend"] == "paddle_trt") {
|
||||||
|
option.paddle_infer_option.collect_trt_shape = true;
|
||||||
|
}
|
||||||
|
if (config_info["backend"] == "paddle_trt" ||
|
||||||
|
config_info["backend"] == "trt") {
|
||||||
|
std::vector<std::vector<int32_t>> trt_shapes =
|
||||||
|
benchmark::ResultManager::GetInputShapes(FLAGS_trt_shape);
|
||||||
|
option.trt_option.SetShape("x", trt_shapes[0], trt_shapes[1],
|
||||||
|
trt_shapes[2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto model_ppocr_table = vision::ocr::StructureV2Table(
|
||||||
|
model_file, params_file, FLAGS_table_char_dict_path, option,
|
||||||
|
model_format);
|
||||||
|
fastdeploy::vision::OCRResult result;
|
||||||
|
|
||||||
|
if (config_info["precision_compare"] == "true") {
|
||||||
|
std::string expect_structure_html =
|
||||||
|
"<html><body><table><thead><tr><td></td><td></td><td></td><td></"
|
||||||
|
"td><td></td></tr></thead><tbody><tr><td></td><td></td><td></td><td></"
|
||||||
|
"td><td></td></tr><tr><td></td><td></td><td></td><td></td><td></td></"
|
||||||
|
"tr><tr><td></td><td></td><td></td><td></td><td></td></tr><tr><td></"
|
||||||
|
"td><td></td><td></td><td></td><td></td></tr><tr><td></td><td></"
|
||||||
|
"td><td></td><td></td><td></td></tr><tr><td></td><td></td><td></"
|
||||||
|
"td><td></td><td></td></tr><tr><td></td><td></td><td></td><td></"
|
||||||
|
"td><td></td></tr><tr><td></td><td></td><td></td><td></td><td></td></"
|
||||||
|
"tr><tr><td></td><td></td><td></td><td></td><td></td></tr><tr><td></"
|
||||||
|
"td><td></td><td></td><td></td><td></td></tr><tr><td></td><td></"
|
||||||
|
"td><td></td><td></td><td></td></tr><tr><td></td><td></td><td></"
|
||||||
|
"td><td></td><td></td></tr><tr><td></td><td></td><td></td><td></"
|
||||||
|
"td><td></td></tr><tr><td></td><td></td><td></td><td></td><td></td></"
|
||||||
|
"tr><tr><td></td><td></td><td></td><td></td><td></td></tr></tbody></"
|
||||||
|
"table></body></html>";
|
||||||
|
std::vector<int> expect_box_coord{
|
||||||
|
41, 4, 97, 18, 161, 4, 173, 18, 216, 4, 225, 17, 272, 4,
|
||||||
|
283, 17, 321, 4, 348, 18, 33, 20, 106, 38, 150, 22, 180, 38,
|
||||||
|
202, 22, 235, 38, 262, 21, 293, 38, 326, 23, 343, 37, 27, 38,
|
||||||
|
109, 56, 150, 39, 179, 56, 204, 39, 236, 56, 263, 39, 292, 55,
|
||||||
|
329, 40, 343, 54, 22, 57, 118, 74, 152, 58, 176, 74, 204, 58,
|
||||||
|
236, 75, 262, 58, 291, 74, 326, 58, 344, 74, 27, 75, 119, 92,
|
||||||
|
150, 75, 177, 92, 204, 75, 235, 92, 260, 75, 292, 92, 326, 75,
|
||||||
|
346, 92, 44, 92, 102, 110, 150, 92, 177, 110, 205, 92, 236, 110,
|
||||||
|
262, 92, 290, 110, 329, 93, 339, 110, 41, 109, 102, 128, 151, 110,
|
||||||
|
175, 128, 205, 110, 236, 128, 262, 110, 291, 127, 329, 110, 338, 127,
|
||||||
|
42, 128, 102, 146, 149, 128, 177, 146, 205, 128, 237, 146, 262, 128,
|
||||||
|
291, 146, 329, 128, 339, 145, 31, 145, 110, 163, 150, 145, 178, 163,
|
||||||
|
206, 145, 237, 164, 262, 145, 292, 163, 324, 145, 342, 162, 40, 162,
|
||||||
|
108, 180, 154, 162, 175, 180, 209, 162, 231, 180, 266, 162, 286, 180,
|
||||||
|
325, 162, 341, 179, 38, 180, 105, 197, 152, 180, 177, 197, 207, 180,
|
||||||
|
236, 197, 262, 180, 291, 197, 329, 181, 339, 196, 42, 196, 102, 214,
|
||||||
|
151, 197, 179, 214, 205, 197, 236, 214, 263, 197, 291, 214, 320, 197,
|
||||||
|
349, 214, 46, 215, 100, 233, 149, 216, 179, 233, 204, 216, 238, 233,
|
||||||
|
262, 216, 291, 233, 321, 216, 345, 232, 42, 233, 104, 251, 147, 234,
|
||||||
|
179, 251, 203, 233, 237, 251, 260, 233, 294, 251, 326, 234, 341, 250,
|
||||||
|
19, 251, 120, 269, 148, 253, 180, 270, 202, 252, 240, 270, 259, 252,
|
||||||
|
294, 270, 324, 252, 347, 268, 16, 270, 123, 286, 146, 270, 182, 287,
|
||||||
|
200, 270, 238, 287, 256, 270, 294, 286, 319, 270, 353, 286};
|
||||||
|
|
||||||
|
// Run once at least
|
||||||
|
if (!model_ppocr_table.Predict(im, &result)) {
|
||||||
|
std::cerr << "Failed to predict." << std::endl;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 1. Test result diff
|
||||||
|
std::cout << "=============== Test Table Result diff =================\n";
|
||||||
|
// Calculate diff between two results.
|
||||||
|
std::string result_table_structure;
|
||||||
|
for (auto &structure : result.table_structure) {
|
||||||
|
result_table_structure += structure;
|
||||||
|
}
|
||||||
|
if (expect_structure_html == result_table_structure) {
|
||||||
|
std::cout << "PPOCR Table structure has no diff" << std::endl;
|
||||||
|
} else {
|
||||||
|
std::cout << "PPOCR Table structure has diff" << std::endl;
|
||||||
|
std::cout << "expected: " << expect_structure_html << std::endl;
|
||||||
|
std::cout << "result: " << result_table_structure << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<int> table_box_coord;
|
||||||
|
for (auto &box : result.table_boxes) {
|
||||||
|
// x1 y1 x2 y1 x2 y2 x1 y2 => x1 y1 x2 y2
|
||||||
|
table_box_coord.push_back(box[0]);
|
||||||
|
table_box_coord.push_back(box[1]);
|
||||||
|
table_box_coord.push_back(box[2]);
|
||||||
|
table_box_coord.push_back(box[5]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (expect_box_coord.size() == table_box_coord.size()) {
|
||||||
|
std::cout << "table boxes num matched with expected: "
|
||||||
|
<< table_box_coord.size() << std::endl;
|
||||||
|
int max_diff = 0;
|
||||||
|
int total_diff = 0;
|
||||||
|
for (int i = 0; i < table_box_coord.size(); i++) {
|
||||||
|
int diff = std::abs(table_box_coord[i] - expect_box_coord[i]);
|
||||||
|
if (diff > max_diff) {
|
||||||
|
max_diff = diff;
|
||||||
|
}
|
||||||
|
total_diff += diff;
|
||||||
|
}
|
||||||
|
std::cout << "box coords, max_diff: " << max_diff << ", "
|
||||||
|
<< ", total diff: " << total_diff << ", average diff: "
|
||||||
|
<< total_diff / float(table_box_coord.size()) << std::endl;
|
||||||
|
} else {
|
||||||
|
std::cout << "boxes num has diff, expect box num: "
|
||||||
|
<< expect_box_coord.size() / 4
|
||||||
|
<< ", result box num:" << table_box_coord.size() / 4
|
||||||
|
<< std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
BENCHMARK_MODEL(model_ppocr_table, model_ppocr_table.Predict(im, &result));
|
||||||
|
#endif
|
||||||
|
return 0;
|
||||||
|
}
|
@@ -212,6 +212,7 @@ download_common_model_xvf https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP
|
|||||||
download_common_model_xvf https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar ch_ppocr_mobile_v2.0_cls_infer.tar
|
download_common_model_xvf https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar ch_ppocr_mobile_v2.0_cls_infer.tar
|
||||||
download_common_model_xvf https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar ch_PP-OCRv2_det_infer.tar
|
download_common_model_xvf https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar ch_PP-OCRv2_det_infer.tar
|
||||||
download_common_model_xvf https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar ch_PP-OCRv2_rec_infer.tar
|
download_common_model_xvf https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar ch_PP-OCRv2_rec_infer.tar
|
||||||
|
download_common_model_xvf https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar
|
||||||
|
|
||||||
# download images
|
# download images
|
||||||
download_common_file https://bj.bcebos.com/paddlehub/fastdeploy/rec_img.jpg rec_img.jpg
|
download_common_file https://bj.bcebos.com/paddlehub/fastdeploy/rec_img.jpg rec_img.jpg
|
||||||
|
@@ -18,5 +18,5 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef ENABLE_TEXT
|
#ifndef ENABLE_TEXT
|
||||||
#define ENABLE_TEXT
|
/* #undef ENABLE_TEXT */
|
||||||
#endif
|
#endif
|
||||||
|
@@ -60,6 +60,11 @@ typedef struct FD_C_OneDimArrayCstr {
|
|||||||
FD_C_Cstr* data;
|
FD_C_Cstr* data;
|
||||||
} FD_C_OneDimArrayCstr; // std::vector<std::string>
|
} FD_C_OneDimArrayCstr; // std::vector<std::string>
|
||||||
|
|
||||||
|
typedef struct FD_C_TwoDimArrayCstr {
|
||||||
|
size_t size;
|
||||||
|
FD_C_OneDimArrayCstr* data;
|
||||||
|
} FD_C_TwoDimArrayCstr; // std::vector<std::vector<std::string>>
|
||||||
|
|
||||||
typedef struct FD_C_TwoDimArraySize {
|
typedef struct FD_C_TwoDimArraySize {
|
||||||
size_t size;
|
size_t size;
|
||||||
FD_C_OneDimArraySize* data;
|
FD_C_OneDimArraySize* data;
|
||||||
@@ -134,6 +139,8 @@ DECLARE_DESTROY_FD_TYPE_FUNCTION(OneDimArrayFloat);
|
|||||||
DECLARE_DESTROY_FD_TYPE_FUNCTION(Cstr);
|
DECLARE_DESTROY_FD_TYPE_FUNCTION(Cstr);
|
||||||
// FD_C_OneDimArrayCstr
|
// FD_C_OneDimArrayCstr
|
||||||
DECLARE_DESTROY_FD_TYPE_FUNCTION(OneDimArrayCstr);
|
DECLARE_DESTROY_FD_TYPE_FUNCTION(OneDimArrayCstr);
|
||||||
|
// FD_C_TwoDimArrayCstr
|
||||||
|
DECLARE_DESTROY_FD_TYPE_FUNCTION(TwoDimArrayCstr);
|
||||||
// FD_C_TwoDimArraySize
|
// FD_C_TwoDimArraySize
|
||||||
DECLARE_DESTROY_FD_TYPE_FUNCTION(TwoDimArraySize);
|
DECLARE_DESTROY_FD_TYPE_FUNCTION(TwoDimArraySize);
|
||||||
// FD_C_TwoDimArrayInt8
|
// FD_C_TwoDimArrayInt8
|
||||||
|
@@ -318,6 +318,124 @@ FD_C_Bool FD_C_DBDetectorWrapperBatchPredict(
|
|||||||
return successful;
|
return successful;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// StructureV2Table
|
||||||
|
FD_C_StructureV2TableWrapper* FD_C_CreateStructureV2TableWrapper(
|
||||||
|
const char* model_file, const char* params_file,
|
||||||
|
const char* table_char_dict_path,
|
||||||
|
FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
|
||||||
|
const FD_C_ModelFormat model_format) {
|
||||||
|
auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
|
||||||
|
fd_c_runtime_option_wrapper);
|
||||||
|
FD_C_StructureV2TableWrapper* fd_c_structurev2table_wrapper =
|
||||||
|
new FD_C_StructureV2TableWrapper();
|
||||||
|
fd_c_structurev2table_wrapper->table_model =
|
||||||
|
std::unique_ptr<fastdeploy::vision::ocr::StructureV2Table>(
|
||||||
|
new fastdeploy::vision::ocr::StructureV2Table(
|
||||||
|
std::string(model_file), std::string(params_file),
|
||||||
|
std::string(table_char_dict_path), *runtime_option,
|
||||||
|
static_cast<fastdeploy::ModelFormat>(model_format)));
|
||||||
|
return fd_c_structurev2table_wrapper;
|
||||||
|
}
|
||||||
|
|
||||||
|
OCR_DECLARE_AND_IMPLEMENT_DESTROY_WRAPPER_FUNCTION(
|
||||||
|
StructureV2Table, fd_c_structurev2table_wrapper)
|
||||||
|
|
||||||
|
FD_C_Bool FD_C_StructureV2TableWrapperPredict(
|
||||||
|
FD_C_StructureV2TableWrapper* fd_c_structurev2table_wrapper, FD_C_Mat img,
|
||||||
|
FD_C_TwoDimArrayInt32* boxes_result,
|
||||||
|
FD_C_OneDimArrayCstr* structure_result) {
|
||||||
|
cv::Mat* im = reinterpret_cast<cv::Mat*>(img);
|
||||||
|
std::vector<std::array<int, 8>> boxes_result_out;
|
||||||
|
std::vector<std::string> structures_result_out;
|
||||||
|
auto& model = CHECK_AND_CONVERT_FD_TYPE(StructureV2TableWrapper,
|
||||||
|
fd_c_structurev2table_wrapper);
|
||||||
|
bool successful =
|
||||||
|
model->Predict(*im, &boxes_result_out, &structures_result_out);
|
||||||
|
if (successful) {
|
||||||
|
// copy boxes
|
||||||
|
const int boxes_coordinate_dim = 8;
|
||||||
|
boxes_result->size = boxes_result_out.size();
|
||||||
|
boxes_result->data = new FD_C_OneDimArrayInt32[boxes_result->size];
|
||||||
|
for (size_t i = 0; i < boxes_result_out.size(); i++) {
|
||||||
|
boxes_result->data[i].size = boxes_coordinate_dim;
|
||||||
|
boxes_result->data[i].data = new int[boxes_coordinate_dim];
|
||||||
|
for (size_t j = 0; j < boxes_coordinate_dim; j++) {
|
||||||
|
boxes_result->data[i].data[j] = boxes_result_out[i][j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// copy structures
|
||||||
|
structure_result->size = structures_result_out.size();
|
||||||
|
structure_result->data = new FD_C_Cstr[structure_result->size];
|
||||||
|
for (int i = 0; i < structures_result_out.size(); i++) {
|
||||||
|
structure_result->data[i].size = structures_result_out[i].length();
|
||||||
|
structure_result->data[i].data =
|
||||||
|
new char[structures_result_out[i].length() + 1];
|
||||||
|
strncpy(structure_result->data[i].data, structures_result_out[i].c_str(),
|
||||||
|
structures_result_out[i].length());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return successful;
|
||||||
|
}
|
||||||
|
|
||||||
|
OCR_DECLARE_AND_IMPLEMENT_INITIALIZED_FUNCTION(StructureV2Table,
|
||||||
|
fd_c_structurev2table_wrapper)
|
||||||
|
|
||||||
|
FD_C_Bool FD_C_StructureV2TableWrapperBatchPredict(
|
||||||
|
FD_C_StructureV2TableWrapper* fd_c_structurev2table_wrapper,
|
||||||
|
FD_C_OneDimMat imgs, FD_C_ThreeDimArrayInt32* det_results,
|
||||||
|
FD_C_TwoDimArrayCstr* structure_results) {
|
||||||
|
std::vector<cv::Mat> imgs_vec;
|
||||||
|
std::vector<std::vector<std::array<int, 8>>> det_results_out;
|
||||||
|
std::vector<std::vector<std::string>> structure_results_out;
|
||||||
|
for (int i = 0; i < imgs.size; i++) {
|
||||||
|
imgs_vec.push_back(*(reinterpret_cast<cv::Mat*>(imgs.data[i])));
|
||||||
|
}
|
||||||
|
auto& model = CHECK_AND_CONVERT_FD_TYPE(StructureV2TableWrapper,
|
||||||
|
fd_c_structurev2table_wrapper);
|
||||||
|
bool successful =
|
||||||
|
model->BatchPredict(imgs_vec, &det_results_out, &structure_results_out);
|
||||||
|
if (successful) {
|
||||||
|
// copy results back to FD_C_ThreeDimArrayInt32
|
||||||
|
det_results->size = det_results_out.size();
|
||||||
|
det_results->data = new FD_C_TwoDimArrayInt32[det_results->size];
|
||||||
|
for (int batch_indx = 0; batch_indx < det_results->size; batch_indx++) {
|
||||||
|
const int boxes_coordinate_dim = 8;
|
||||||
|
det_results->data[batch_indx].size = det_results_out[batch_indx].size();
|
||||||
|
det_results->data[batch_indx].data =
|
||||||
|
new FD_C_OneDimArrayInt32[det_results->data[batch_indx].size];
|
||||||
|
for (size_t i = 0; i < det_results_out[batch_indx].size(); i++) {
|
||||||
|
det_results->data[batch_indx].data[i].size = boxes_coordinate_dim;
|
||||||
|
det_results->data[batch_indx].data[i].data =
|
||||||
|
new int[boxes_coordinate_dim];
|
||||||
|
for (size_t j = 0; j < boxes_coordinate_dim; j++) {
|
||||||
|
det_results->data[batch_indx].data[i].data[j] =
|
||||||
|
det_results_out[batch_indx][i][j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// copy structures
|
||||||
|
structure_results->size = structure_results_out.size();
|
||||||
|
structure_results->data = new FD_C_OneDimArrayCstr[structure_results->size];
|
||||||
|
for (int batch_indx = 0; batch_indx < structure_results->size;
|
||||||
|
batch_indx++) {
|
||||||
|
structure_results->data[batch_indx].size =
|
||||||
|
structure_results_out[batch_indx].size();
|
||||||
|
structure_results->data[batch_indx].data =
|
||||||
|
new FD_C_Cstr[structure_results->data[batch_indx].size];
|
||||||
|
for (int i = 0; i < structure_results_out[batch_indx].size(); i++) {
|
||||||
|
structure_results->data[batch_indx].data[i].size =
|
||||||
|
structure_results_out[batch_indx][i].length();
|
||||||
|
structure_results->data[batch_indx].data[i].data =
|
||||||
|
new char[structure_results_out[batch_indx][i].length() + 1];
|
||||||
|
strncpy(structure_results->data[batch_indx].data[i].data,
|
||||||
|
structure_results_out[batch_indx][i].c_str(),
|
||||||
|
structure_results_out[batch_indx][i].length());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return successful;
|
||||||
|
}
|
||||||
|
|
||||||
// PPOCRv2
|
// PPOCRv2
|
||||||
|
|
||||||
FD_C_PPOCRv2Wrapper* FD_C_CreatePPOCRv2Wrapper(
|
FD_C_PPOCRv2Wrapper* FD_C_CreatePPOCRv2Wrapper(
|
||||||
@@ -466,6 +584,82 @@ FD_C_Bool FD_C_PPOCRv3WrapperBatchPredict(
|
|||||||
return successful;
|
return successful;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// PPStructureV2Table
|
||||||
|
|
||||||
|
FD_C_PPStructureV2TableWrapper* FD_C_CreatePPStructureV2TableWrapper(
|
||||||
|
FD_C_DBDetectorWrapper* fd_c_det_model_wrapper,
|
||||||
|
FD_C_RecognizerWrapper* fd_c_rec_model_wrapper,
|
||||||
|
FD_C_StructureV2TableWrapper* fd_c_structurev2table_wrapper) {
|
||||||
|
FD_C_PPStructureV2TableWrapper* fd_c_ppstructurev2table_wrapper =
|
||||||
|
new FD_C_PPStructureV2TableWrapper();
|
||||||
|
auto& det_model =
|
||||||
|
CHECK_AND_CONVERT_FD_TYPE(DBDetectorWrapper, fd_c_det_model_wrapper);
|
||||||
|
auto& rec_model =
|
||||||
|
CHECK_AND_CONVERT_FD_TYPE(RecognizerWrapper, fd_c_rec_model_wrapper);
|
||||||
|
auto& table_model = CHECK_AND_CONVERT_FD_TYPE(StructureV2TableWrapper,
|
||||||
|
fd_c_structurev2table_wrapper);
|
||||||
|
fd_c_ppstructurev2table_wrapper->ppstructurev2table_model =
|
||||||
|
std::unique_ptr<fastdeploy::pipeline::PPStructureV2Table>(
|
||||||
|
new fastdeploy::pipeline::PPStructureV2Table(
|
||||||
|
det_model.get(), rec_model.get(), table_model.get()));
|
||||||
|
return fd_c_ppstructurev2table_wrapper;
|
||||||
|
}
|
||||||
|
|
||||||
|
PIPELINE_DECLARE_AND_IMPLEMENT_DESTROY_WRAPPER_FUNCTION(
|
||||||
|
PPStructureV2Table, fd_c_ppstructurev2table_wrapper)
|
||||||
|
|
||||||
|
FD_C_Bool FD_C_PPStructureV2TableWrapperPredict(
|
||||||
|
FD_C_PPStructureV2TableWrapper* fd_c_ppstructurev2table_wrapper,
|
||||||
|
FD_C_Mat img, FD_C_OCRResult* fd_c_ocr_result) {
|
||||||
|
cv::Mat* im = reinterpret_cast<cv::Mat*>(img);
|
||||||
|
auto& model = CHECK_AND_CONVERT_FD_TYPE(PPStructureV2TableWrapper,
|
||||||
|
fd_c_ppstructurev2table_wrapper);
|
||||||
|
FD_C_OCRResultWrapper* fd_c_ocr_result_wrapper =
|
||||||
|
FD_C_CreateOCRResultWrapper();
|
||||||
|
auto& ocr_result =
|
||||||
|
CHECK_AND_CONVERT_FD_TYPE(OCRResultWrapper, fd_c_ocr_result_wrapper);
|
||||||
|
|
||||||
|
bool successful = model->Predict(im, ocr_result.get());
|
||||||
|
if (successful) {
|
||||||
|
FD_C_OCRResultWrapperToCResult(fd_c_ocr_result_wrapper, fd_c_ocr_result);
|
||||||
|
}
|
||||||
|
FD_C_DestroyOCRResultWrapper(fd_c_ocr_result_wrapper);
|
||||||
|
return successful;
|
||||||
|
}
|
||||||
|
|
||||||
|
PIPELINE_DECLARE_AND_IMPLEMENT_INITIALIZED_FUNCTION(
|
||||||
|
PPStructureV2Table, fd_c_ppstructurev2table_wrapper)
|
||||||
|
|
||||||
|
FD_C_Bool FD_C_PPStructureV2TableWrapperBatchPredict(
|
||||||
|
FD_C_PPStructureV2TableWrapper* fd_c_ppstructurev2table_wrapper,
|
||||||
|
FD_C_OneDimMat imgs, FD_C_OneDimOCRResult* results) {
|
||||||
|
std::vector<cv::Mat> imgs_vec;
|
||||||
|
std::vector<FD_C_OCRResultWrapper*> results_wrapper_out;
|
||||||
|
std::vector<fastdeploy::vision::OCRResult> results_out;
|
||||||
|
for (int i = 0; i < imgs.size; i++) {
|
||||||
|
imgs_vec.push_back(*(reinterpret_cast<cv::Mat*>(imgs.data[i])));
|
||||||
|
FD_C_OCRResultWrapper* fd_ocr_result_wrapper =
|
||||||
|
FD_C_CreateOCRResultWrapper();
|
||||||
|
results_wrapper_out.push_back(fd_ocr_result_wrapper);
|
||||||
|
}
|
||||||
|
auto& model = CHECK_AND_CONVERT_FD_TYPE(PPStructureV2TableWrapper,
|
||||||
|
fd_c_ppstructurev2table_wrapper);
|
||||||
|
bool successful = model->BatchPredict(imgs_vec, &results_out);
|
||||||
|
if (successful) {
|
||||||
|
// copy results back to FD_C_OneDimOCRResult
|
||||||
|
results->size = results_out.size();
|
||||||
|
results->data = new FD_C_OCRResult[results->size];
|
||||||
|
for (int i = 0; i < results_out.size(); i++) {
|
||||||
|
(*CHECK_AND_CONVERT_FD_TYPE(OCRResultWrapper, results_wrapper_out[i])) =
|
||||||
|
std::move(results_out[i]);
|
||||||
|
FD_C_OCRResultWrapperToCResult(results_wrapper_out[i], &results->data[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (int i = 0; i < results_out.size(); i++) {
|
||||||
|
FD_C_DestroyOCRResultWrapper(results_wrapper_out[i]);
|
||||||
|
}
|
||||||
|
return successful;
|
||||||
|
}
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
@@ -225,6 +225,68 @@ FASTDEPLOY_CAPI_EXPORT extern FD_C_Bool FD_C_DBDetectorWrapperBatchPredict(
|
|||||||
FD_C_ThreeDimArrayInt32* det_results);
|
FD_C_ThreeDimArrayInt32* det_results);
|
||||||
|
|
||||||
|
|
||||||
|
// StructureV2Table
|
||||||
|
|
||||||
|
typedef struct FD_C_StructureV2TableWrapper FD_C_StructureV2TableWrapper;
|
||||||
|
|
||||||
|
/** \brief Create a new FD_C_StructureV2TableWrapper object
|
||||||
|
*
|
||||||
|
* \param[in] model_file Path of model file, e.g ./en_ppstructure_mobile_v2.0_SLANet_infer/model.pdmodel.
|
||||||
|
* \param[in] params_file Path of parameter file, e.g ./en_ppstructure_mobile_v2.0_SLANet_infer/model.pdiparams, if the model format is ONNX, this parameter will be ignored.
|
||||||
|
* \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in `valid_cpu_backends`.
|
||||||
|
* \param[in] model_format Model format of the loaded model, default is Paddle format.
|
||||||
|
*
|
||||||
|
* \return Return a pointer to FD_C_StructureV2TableWrapper object
|
||||||
|
*/
|
||||||
|
|
||||||
|
FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_StructureV2TableWrapper*
|
||||||
|
FD_C_CreateStructureV2TableWrapper(
|
||||||
|
const char* model_file, const char* params_file, const char* table_char_dict_path,
|
||||||
|
FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
|
||||||
|
const FD_C_ModelFormat model_format);
|
||||||
|
|
||||||
|
/** \brief Destroy a FD_C_StructureV2TableWrapper object
|
||||||
|
*
|
||||||
|
* \param[in] fd_c_structurev2table_wrapper pointer to FD_C_DBDetectorWrapper object
|
||||||
|
*/
|
||||||
|
|
||||||
|
OCR_DECLARE_DESTROY_WRAPPER_FUNCTION(StructureV2Table, fd_c_structurev2table_wrapper);
|
||||||
|
|
||||||
|
/** \brief Predict the input image and get OCR table model result.
|
||||||
|
*
|
||||||
|
* \param[in] fd_c_structurev2table_wrapper pointer to FD_C_StructureV2TableWrapper object
|
||||||
|
* \param[in] img The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format.
|
||||||
|
* \param[in] boxes_result The output of OCR table model result will be writen to this structure.
|
||||||
|
* \return true if the prediction is successed, otherwise false.
|
||||||
|
*/
|
||||||
|
|
||||||
|
FASTDEPLOY_CAPI_EXPORT extern FD_C_Bool FD_C_StructureV2TableWrapperPredict(
|
||||||
|
__fd_keep FD_C_StructureV2TableWrapper* fd_c_structurev2table_wrapper, FD_C_Mat img,
|
||||||
|
FD_C_TwoDimArrayInt32* boxes_result, FD_C_OneDimArrayCstr* structure_result);
|
||||||
|
|
||||||
|
/** \brief Check if the model is initialized successfully
|
||||||
|
*
|
||||||
|
* \param[in] fd_c_dbdetector_wrapper pointer to FD_C_StructureV2TableWrapper object
|
||||||
|
*
|
||||||
|
* \return Return a bool of value true if initialized successfully
|
||||||
|
*/
|
||||||
|
|
||||||
|
OCR_DECLARE_INITIALIZED_FUNCTION(StructureV2Table, fd_c_structurev2table_wrapper);
|
||||||
|
|
||||||
|
/** \brief BatchPredict the input image and get OCR table model result.
|
||||||
|
*
|
||||||
|
* \param[in] fd_c_structurev2table_wrapper pointer to FD_C_StructureV2TableWrapper object
|
||||||
|
* \param[in] imgs The list input of image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format.
|
||||||
|
* \param[in] det_results The output of OCR table model result will be writen to this structure.
|
||||||
|
*
|
||||||
|
* \return true if the prediction is successed, otherwise false.
|
||||||
|
*/
|
||||||
|
|
||||||
|
FASTDEPLOY_CAPI_EXPORT extern FD_C_Bool FD_C_StructureV2TableWrapperBatchPredict(
|
||||||
|
__fd_keep FD_C_StructureV2TableWrapper* fd_c_structurev2table_wrapper, FD_C_OneDimMat imgs,
|
||||||
|
FD_C_ThreeDimArrayInt32* det_results, FD_C_TwoDimArrayCstr* structure_results);
|
||||||
|
|
||||||
|
|
||||||
// PPOCRv2
|
// PPOCRv2
|
||||||
|
|
||||||
|
|
||||||
@@ -343,6 +405,63 @@ FASTDEPLOY_CAPI_EXPORT extern FD_C_Bool FD_C_PPOCRv3WrapperBatchPredict(
|
|||||||
FD_C_OneDimOCRResult* batch_result);
|
FD_C_OneDimOCRResult* batch_result);
|
||||||
|
|
||||||
|
|
||||||
|
// PPStructureV2Table
|
||||||
|
|
||||||
|
typedef struct FD_C_PPStructureV2TableWrapper FD_C_PPStructureV2TableWrapper;
|
||||||
|
|
||||||
|
/** \brief Set up the detection model path, classification model path and table recognition model path respectively.
|
||||||
|
*
|
||||||
|
* \param[in] det_model Path of detection model, e.g ./ch_PP-OCRv3_det_infer
|
||||||
|
* \param[in] rec_model Path of recognition model, e.g ./ch_PP-OCRv3_rec_infer
|
||||||
|
* \param[in] table_model Path of table model, e.g ./en_ppstructure_mobile_v2.0_SLANet_infer
|
||||||
|
*/
|
||||||
|
|
||||||
|
FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_PPStructureV2TableWrapper*
|
||||||
|
FD_C_CreatePPStructureV2TableWrapper(
|
||||||
|
FD_C_DBDetectorWrapper* det_model,
|
||||||
|
FD_C_RecognizerWrapper* rec_model,
|
||||||
|
FD_C_StructureV2TableWrapper* table_model);
|
||||||
|
|
||||||
|
/** \brief Destroy a FD_C_PPTableWrapper object
|
||||||
|
*
|
||||||
|
* \param[in] fd_c_ppstructurev2table_wrapper pointer to FD_C_PPStructureV2TableWrapper object
|
||||||
|
*/
|
||||||
|
|
||||||
|
OCR_DECLARE_DESTROY_WRAPPER_FUNCTION(PPStructureV2Table, fd_c_ppstructurev2table_wrapper);
|
||||||
|
|
||||||
|
/** \brief Predict the input image and get OCR result.
|
||||||
|
*
|
||||||
|
* \param[in] fd_c_ppstructurev2table_wrapper pointer to FD_C_PPStructureV2TableWrapper object
|
||||||
|
* \param[in] img The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format.
|
||||||
|
* \param[in] result The output OCR result will be writen to this structure.
|
||||||
|
* \return true if the prediction successed, otherwise false.
|
||||||
|
*/
|
||||||
|
|
||||||
|
FASTDEPLOY_CAPI_EXPORT extern FD_C_Bool FD_C_PPStructureV2TableWrapperPredict(
|
||||||
|
__fd_keep FD_C_PPStructureV2TableWrapper* fd_c_ppstructurev2table_wrapper, FD_C_Mat img,
|
||||||
|
FD_C_OCRResult* result);
|
||||||
|
|
||||||
|
/** \brief Check if the model is initialized successfully
|
||||||
|
*
|
||||||
|
* \param[in] fd_c_ppstructurev2table_wrapper pointer to FD_C_PPStructureV2TableWrapper object
|
||||||
|
*
|
||||||
|
* \return Return a bool of value true if initialized successfully
|
||||||
|
*/
|
||||||
|
|
||||||
|
OCR_DECLARE_INITIALIZED_FUNCTION(PPStructureV2Table, fd_c_ppstructurev2table_wrapper);
|
||||||
|
|
||||||
|
/** \brief BatchPredict the input image and get OCR result.
|
||||||
|
*
|
||||||
|
* \param[in] fd_c_ppstructurev2table_wrapper pointer to FD_C_PPStructureV2TableWrapper object
|
||||||
|
* \param[in] imgs The list of input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format.
|
||||||
|
* \param[in] batch_result The output list of OCR result will be writen to this structure.
|
||||||
|
* \return true if the prediction successed, otherwise false.
|
||||||
|
*/
|
||||||
|
|
||||||
|
FASTDEPLOY_CAPI_EXPORT extern FD_C_Bool FD_C_PPStructureV2TableWrapperBatchPredict(
|
||||||
|
__fd_keep FD_C_PPStructureV2TableWrapper* fd_c_ppstructurev2table_wrapper, FD_C_OneDimMat imgs,
|
||||||
|
FD_C_OneDimOCRResult* batch_result);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} // extern "C"
|
} // extern "C"
|
||||||
#endif
|
#endif
|
||||||
|
@@ -70,6 +70,9 @@ typedef struct FD_C_OCRResult {
|
|||||||
FD_C_OneDimArrayFloat rec_scores;
|
FD_C_OneDimArrayFloat rec_scores;
|
||||||
FD_C_OneDimArrayFloat cls_scores;
|
FD_C_OneDimArrayFloat cls_scores;
|
||||||
FD_C_OneDimArrayInt32 cls_labels;
|
FD_C_OneDimArrayInt32 cls_labels;
|
||||||
|
FD_C_TwoDimArrayInt32 table_boxes;
|
||||||
|
FD_C_OneDimArrayCstr table_structure;
|
||||||
|
FD_C_Cstr table_html;
|
||||||
FD_C_ResultType type;
|
FD_C_ResultType type;
|
||||||
} FD_C_OCRResult;
|
} FD_C_OCRResult;
|
||||||
|
|
||||||
|
@@ -166,6 +166,10 @@ DECL_AND_IMPLEMENT_OCR_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
|
|||||||
DECL_AND_IMPLEMENT_OCR_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
|
DECL_AND_IMPLEMENT_OCR_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
|
||||||
Classifier, fd_classifier_wrapper, classifier_model);
|
Classifier, fd_classifier_wrapper, classifier_model);
|
||||||
|
|
||||||
|
// Table
|
||||||
|
DECL_AND_IMPLEMENT_OCR_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
|
||||||
|
StructureV2Table, fd_structurev2_table_wrapper, table_model);
|
||||||
|
|
||||||
// PPOCRv2
|
// PPOCRv2
|
||||||
DECL_AND_IMPLEMENT_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
|
DECL_AND_IMPLEMENT_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
|
||||||
PPOCRv2, fd_ppocrv2_wrapper, ppocrv2_model);
|
PPOCRv2, fd_ppocrv2_wrapper, ppocrv2_model);
|
||||||
@@ -174,6 +178,11 @@ DECL_AND_IMPLEMENT_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
|
|||||||
DECL_AND_IMPLEMENT_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
|
DECL_AND_IMPLEMENT_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
|
||||||
PPOCRv3, fd_ppocrv3_wrapper, ppocrv3_model);
|
PPOCRv3, fd_ppocrv3_wrapper, ppocrv3_model);
|
||||||
|
|
||||||
|
// PPStructureV2Table
|
||||||
|
DECL_AND_IMPLEMENT_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
|
||||||
|
PPStructureV2Table, fd_ppstructurev2_table_wrapper,
|
||||||
|
ppstructurev2table_model);
|
||||||
|
|
||||||
// Segmentation models
|
// Segmentation models
|
||||||
|
|
||||||
// PaddleSegModel
|
// PaddleSegModel
|
||||||
|
@@ -29,8 +29,10 @@
|
|||||||
#include "fastdeploy/vision/ocr/ppocr/classifier.h"
|
#include "fastdeploy/vision/ocr/ppocr/classifier.h"
|
||||||
#include "fastdeploy/vision/ocr/ppocr/dbdetector.h"
|
#include "fastdeploy/vision/ocr/ppocr/dbdetector.h"
|
||||||
#include "fastdeploy/vision/ocr/ppocr/recognizer.h"
|
#include "fastdeploy/vision/ocr/ppocr/recognizer.h"
|
||||||
|
#include "fastdeploy/vision/ocr/ppocr/structurev2_table.h"
|
||||||
#include "fastdeploy/vision/ocr/ppocr/ppocr_v2.h"
|
#include "fastdeploy/vision/ocr/ppocr/ppocr_v2.h"
|
||||||
#include "fastdeploy/vision/ocr/ppocr/ppocr_v3.h"
|
#include "fastdeploy/vision/ocr/ppocr/ppocr_v3.h"
|
||||||
|
#include "fastdeploy/vision/ocr/ppocr/ppstructurev2_table.h"
|
||||||
#include "fastdeploy/vision/segmentation/ppseg/model.h"
|
#include "fastdeploy/vision/segmentation/ppseg/model.h"
|
||||||
|
|
||||||
#define DEFINE_RESULT_WRAPPER_STRUCT(typename, varname) typedef struct FD_C_##typename##Wrapper { \
|
#define DEFINE_RESULT_WRAPPER_STRUCT(typename, varname) typedef struct FD_C_##typename##Wrapper { \
|
||||||
@@ -176,12 +178,18 @@ DEFINE_OCR_MODEL_WRAPPER_STRUCT(DBDetector, dbdetector_model);
|
|||||||
// Classifier
|
// Classifier
|
||||||
DEFINE_OCR_MODEL_WRAPPER_STRUCT(Classifier, classifier_model);
|
DEFINE_OCR_MODEL_WRAPPER_STRUCT(Classifier, classifier_model);
|
||||||
|
|
||||||
|
// StructureV2Table
|
||||||
|
DEFINE_OCR_MODEL_WRAPPER_STRUCT(StructureV2Table, table_model);
|
||||||
|
|
||||||
// PPOCRv2
|
// PPOCRv2
|
||||||
DEFINE_PIPELINE_MODEL_WRAPPER_STRUCT(PPOCRv2, ppocrv2_model);
|
DEFINE_PIPELINE_MODEL_WRAPPER_STRUCT(PPOCRv2, ppocrv2_model);
|
||||||
|
|
||||||
// PPOCRv3
|
// PPOCRv3
|
||||||
DEFINE_PIPELINE_MODEL_WRAPPER_STRUCT(PPOCRv3, ppocrv3_model);
|
DEFINE_PIPELINE_MODEL_WRAPPER_STRUCT(PPOCRv3, ppocrv3_model);
|
||||||
|
|
||||||
|
// PPStructureV2Table
|
||||||
|
DEFINE_PIPELINE_MODEL_WRAPPER_STRUCT(PPStructureV2Table, ppstructurev2table_model);
|
||||||
|
|
||||||
// Segmentation models
|
// Segmentation models
|
||||||
|
|
||||||
// PaddleSegModel
|
// PaddleSegModel
|
||||||
@@ -383,12 +391,18 @@ DECLARE_OCR_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(DBDetector, fd_dbdetector_wrappe
|
|||||||
// Classifier
|
// Classifier
|
||||||
DECLARE_OCR_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(Classifier, fd_classifier_wrapper);
|
DECLARE_OCR_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(Classifier, fd_classifier_wrapper);
|
||||||
|
|
||||||
|
// Table
|
||||||
|
DECLARE_OCR_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(StructureV2Table, fd_structurev2_table_wrapper);
|
||||||
|
|
||||||
// PPOCRv2
|
// PPOCRv2
|
||||||
DECLARE_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(PPOCRv2, fd_ppocrv2_wrapper);
|
DECLARE_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(PPOCRv2, fd_ppocrv2_wrapper);
|
||||||
|
|
||||||
// PPOCRv3
|
// PPOCRv3
|
||||||
DECLARE_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(PPOCRv3, fd_ppocrv3_wrapper);
|
DECLARE_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(PPOCRv3, fd_ppocrv3_wrapper);
|
||||||
|
|
||||||
|
// PPStructureV2Table
|
||||||
|
DECLARE_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(PPStructureV2Table, fd_ppstructurev2_table_wrapper);
|
||||||
|
|
||||||
// Segmentation models
|
// Segmentation models
|
||||||
|
|
||||||
// PaddleSegModel
|
// PaddleSegModel
|
||||||
|
@@ -14,6 +14,11 @@ add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc)
|
|||||||
# 添加FastDeploy库依赖
|
# 添加FastDeploy库依赖
|
||||||
target_link_libraries(infer_demo ${FASTDEPLOY_LIBS})
|
target_link_libraries(infer_demo ${FASTDEPLOY_LIBS})
|
||||||
|
|
||||||
|
# PPStructure-V2-Table
|
||||||
|
add_executable(infer_ppstructurev2_table ${PROJECT_SOURCE_DIR}/infer_ppstructurev2_table.cc)
|
||||||
|
# 添加FastDeploy库依赖
|
||||||
|
target_link_libraries(infer_ppstructurev2_table ${FASTDEPLOY_LIBS})
|
||||||
|
|
||||||
# Only Det
|
# Only Det
|
||||||
add_executable(infer_det ${PROJECT_SOURCE_DIR}/infer_det.cc)
|
add_executable(infer_det ${PROJECT_SOURCE_DIR}/infer_det.cc)
|
||||||
# 添加FastDeploy库依赖
|
# 添加FastDeploy库依赖
|
||||||
@@ -28,3 +33,8 @@ target_link_libraries(infer_cls ${FASTDEPLOY_LIBS})
|
|||||||
add_executable(infer_rec ${PROJECT_SOURCE_DIR}/infer_rec.cc)
|
add_executable(infer_rec ${PROJECT_SOURCE_DIR}/infer_rec.cc)
|
||||||
# 添加FastDeploy库依赖
|
# 添加FastDeploy库依赖
|
||||||
target_link_libraries(infer_rec ${FASTDEPLOY_LIBS})
|
target_link_libraries(infer_rec ${FASTDEPLOY_LIBS})
|
||||||
|
|
||||||
|
# Only Table
|
||||||
|
add_executable(infer_structurev2_table ${PROJECT_SOURCE_DIR}/infer_structurev2_table.cc)
|
||||||
|
# 添加FastDeploy库依赖
|
||||||
|
target_link_libraries(infer_structurev2_table ${FASTDEPLOY_LIBS})
|
||||||
|
@@ -43,10 +43,15 @@ tar -xvf ch_ppocr_mobile_v2.0_cls_infer.tar
|
|||||||
# 下载PP-OCRv3文字识别模型
|
# 下载PP-OCRv3文字识别模型
|
||||||
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar
|
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar
|
||||||
tar -xvf ch_PP-OCRv3_rec_infer.tar
|
tar -xvf ch_PP-OCRv3_rec_infer.tar
|
||||||
|
# 下载PPStructureV2表格识别模型
|
||||||
|
wget https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar
|
||||||
|
tar xf ch_ppstructure_mobile_v2.0_SLANet_infer.tar
|
||||||
|
|
||||||
# 下载预测图片与字典文件
|
# 下载预测图片与字典文件
|
||||||
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg
|
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg
|
||||||
|
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppstructure/docs/table/table.jpg
|
||||||
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt
|
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt
|
||||||
|
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/dict/table_structure_dict_ch.txt
|
||||||
|
|
||||||
# 运行部署示例
|
# 运行部署示例
|
||||||
# 在CPU上使用Paddle Inference推理
|
# 在CPU上使用Paddle Inference推理
|
||||||
@@ -77,6 +82,9 @@ wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_
|
|||||||
|
|
||||||
# 在CPU上,单独使用文字识别模型部署
|
# 在CPU上,单独使用文字识别模型部署
|
||||||
./infer_rec ./ch_PP-OCRv3_rec_infer ./ppocr_keys_v1.txt ./12.jpg 0
|
./infer_rec ./ch_PP-OCRv3_rec_infer ./ppocr_keys_v1.txt ./12.jpg 0
|
||||||
|
|
||||||
|
# 在CPU上,单独使用表格识别模型部署
|
||||||
|
./infer_structurev2_table ./ch_ppstructure_mobile_v2.0_SLANet_infer ./table_structure_dict_ch.txt ./table.jpg 0
|
||||||
```
|
```
|
||||||
|
|
||||||
运行完成可视化结果如下图所示
|
运行完成可视化结果如下图所示
|
||||||
|
177
examples/vision/ocr/PP-OCR/cpu-gpu/cpp/infer_ppstructurev2_table.cc
Executable file
177
examples/vision/ocr/PP-OCR/cpu-gpu/cpp/infer_ppstructurev2_table.cc
Executable file
@@ -0,0 +1,177 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "fastdeploy/vision.h"
|
||||||
|
#ifdef WIN32
|
||||||
|
const char sep = '\\';
|
||||||
|
#else
|
||||||
|
const char sep = '/';
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void InitAndInfer(const std::string &det_model_dir,
|
||||||
|
const std::string &rec_model_dir,
|
||||||
|
const std::string &table_model_dir,
|
||||||
|
const std::string &rec_label_file,
|
||||||
|
const std::string &table_char_dict_path,
|
||||||
|
const std::string &image_file,
|
||||||
|
const fastdeploy::RuntimeOption &option) {
|
||||||
|
auto det_model_file = det_model_dir + sep + "inference.pdmodel";
|
||||||
|
auto det_params_file = det_model_dir + sep + "inference.pdiparams";
|
||||||
|
|
||||||
|
auto rec_model_file = rec_model_dir + sep + "inference.pdmodel";
|
||||||
|
auto rec_params_file = rec_model_dir + sep + "inference.pdiparams";
|
||||||
|
|
||||||
|
auto table_model_file = table_model_dir + sep + "inference.pdmodel";
|
||||||
|
auto table_params_file = table_model_dir + sep + "inference.pdiparams";
|
||||||
|
|
||||||
|
auto det_option = option;
|
||||||
|
auto rec_option = option;
|
||||||
|
auto table_option = option;
|
||||||
|
|
||||||
|
// The rec model can inference a batch of images now.
|
||||||
|
// User could initialize the inference batch size and set them after create
|
||||||
|
// PP-OCR model.
|
||||||
|
int rec_batch_size = 1;
|
||||||
|
|
||||||
|
// If use TRT backend, the dynamic shape will be set as follow.
|
||||||
|
// We recommend that users set the length and height of the detection model to
|
||||||
|
// a multiple of 32.
|
||||||
|
// We also recommend that users set the Trt input shape as follow.
|
||||||
|
det_option.SetTrtInputShape("x", {1, 3, 64, 64}, {1, 3, 640, 640},
|
||||||
|
{1, 3, 960, 960});
|
||||||
|
rec_option.SetTrtInputShape("x", {1, 3, 48, 10}, {rec_batch_size, 3, 48, 320},
|
||||||
|
{rec_batch_size, 3, 48, 2304});
|
||||||
|
table_option.SetTrtInputShape("x", {1, 3, 488, 488}, {1, 3, 488, 488},
|
||||||
|
{1, 3, 488, 488});
|
||||||
|
|
||||||
|
// Users could save TRT cache file to disk as follow.
|
||||||
|
det_option.SetTrtCacheFile(det_model_dir + sep + "det_trt_cache.trt");
|
||||||
|
rec_option.SetTrtCacheFile(rec_model_dir + sep + "rec_trt_cache.trt");
|
||||||
|
table_option.SetTrtCacheFile(table_model_dir + sep + "table_trt_cache.trt");
|
||||||
|
|
||||||
|
auto det_model = fastdeploy::vision::ocr::DBDetector(
|
||||||
|
det_model_file, det_params_file, det_option);
|
||||||
|
auto rec_model = fastdeploy::vision::ocr::Recognizer(
|
||||||
|
rec_model_file, rec_params_file, rec_label_file, rec_option);
|
||||||
|
auto table_model = fastdeploy::vision::ocr::StructureV2Table(
|
||||||
|
table_model_file, table_params_file, table_char_dict_path, table_option);
|
||||||
|
|
||||||
|
assert(det_model.Initialized());
|
||||||
|
assert(rec_model.Initialized());
|
||||||
|
assert(table_model.Initialized());
|
||||||
|
|
||||||
|
// Parameters settings for pre and post processing of Det/Cls/Rec Models.
|
||||||
|
// All parameters are set to default values.
|
||||||
|
det_model.GetPreprocessor().SetMaxSideLen(960);
|
||||||
|
det_model.GetPostprocessor().SetDetDBThresh(0.3);
|
||||||
|
det_model.GetPostprocessor().SetDetDBBoxThresh(0.6);
|
||||||
|
det_model.GetPostprocessor().SetDetDBUnclipRatio(1.5);
|
||||||
|
det_model.GetPostprocessor().SetDetDBScoreMode("slow");
|
||||||
|
det_model.GetPostprocessor().SetUseDilation(0);
|
||||||
|
|
||||||
|
rec_model.GetPreprocessor().SetStaticShapeInfer(true);
|
||||||
|
rec_model.GetPreprocessor().SetRecImageShape({3, 48, 320});
|
||||||
|
|
||||||
|
// The classification model is optional, so the PP-OCR can also be connected
|
||||||
|
// in series as follows
|
||||||
|
auto ppstructurev2_table = fastdeploy::pipeline::PPStructureV2Table(
|
||||||
|
&det_model, &rec_model, &table_model);
|
||||||
|
|
||||||
|
// Set inference batch size for cls model and rec model, the value could be -1
|
||||||
|
// and 1 to positive infinity.
|
||||||
|
// When inference batch size is set to -1, it means that the inference batch
|
||||||
|
// size of the rec models will be the same as the number of boxes detected
|
||||||
|
// by the det model.
|
||||||
|
ppstructurev2_table.SetRecBatchSize(rec_batch_size);
|
||||||
|
|
||||||
|
if (!ppstructurev2_table.Initialized()) {
|
||||||
|
std::cerr << "Failed to initialize PP-OCR-Table." << std::endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto im = cv::imread(image_file);
|
||||||
|
auto im_bak = im.clone();
|
||||||
|
|
||||||
|
fastdeploy::vision::OCRResult result;
|
||||||
|
if (!ppstructurev2_table.Predict(&im, &result)) {
|
||||||
|
std::cerr << "Failed to predict." << std::endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << result.Str() << std::endl;
|
||||||
|
|
||||||
|
auto vis_im = fastdeploy::vision::VisOcr(im_bak, result);
|
||||||
|
cv::imwrite("vis_result.jpg", vis_im);
|
||||||
|
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]) {
|
||||||
|
if (argc < 8) {
|
||||||
|
std::cout << "Usage: infer_ppstructurev2_table path/to/det_model "
|
||||||
|
"path/to/rec_model "
|
||||||
|
"path/to/table_model path/to/rec_label_file "
|
||||||
|
"path/to/table_char_dict_path path/to/image "
|
||||||
|
"run_option, "
|
||||||
|
"e.g ./infer_ppstructurev2_table ./ch_PP-OCRv3_det_infer "
|
||||||
|
"./ch_ppocr_mobile_v2.0_cls_infer ./ch_PP-OCRv3_rec_infer "
|
||||||
|
"./ppocr_keys_v1.txt ./12.jpg 0"
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << "The data type of run_option is int, e.g. 0: run with paddle "
|
||||||
|
"inference on cpu;"
|
||||||
|
<< std::endl;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
fastdeploy::RuntimeOption option;
|
||||||
|
int flag = std::atoi(argv[7]);
|
||||||
|
std::cout << "flag: " << flag << std::endl;
|
||||||
|
|
||||||
|
if (flag == 0) {
|
||||||
|
option.UseCpu();
|
||||||
|
option.UsePaddleBackend(); // Paddle Inference
|
||||||
|
} else if (flag == 1) {
|
||||||
|
option.UseCpu();
|
||||||
|
option.UseOpenVINOBackend(); // OpenVINO
|
||||||
|
} else if (flag == 2) {
|
||||||
|
option.UseCpu();
|
||||||
|
option.UseOrtBackend(); // ONNX Runtime
|
||||||
|
} else if (flag == 3) {
|
||||||
|
option.UseCpu();
|
||||||
|
option.UseLiteBackend(); // Paddle Lite
|
||||||
|
} else if (flag == 4) {
|
||||||
|
option.UseGpu();
|
||||||
|
option.UsePaddleBackend(); // Paddle Inference
|
||||||
|
} else if (flag == 5) {
|
||||||
|
option.UseGpu();
|
||||||
|
option.UsePaddleInferBackend();
|
||||||
|
option.paddle_infer_option.collect_trt_shape = true;
|
||||||
|
option.paddle_infer_option.enable_trt = true; // Paddle-TensorRT
|
||||||
|
} else if (flag == 6) {
|
||||||
|
option.UseGpu();
|
||||||
|
option.UseOrtBackend(); // ONNX Runtime
|
||||||
|
} else if (flag == 7) {
|
||||||
|
option.UseGpu();
|
||||||
|
option.UseTrtBackend(); // TensorRT
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string det_model_dir = argv[1];
|
||||||
|
std::string rec_model_dir = argv[2];
|
||||||
|
std::string table_model_dir = argv[3];
|
||||||
|
std::string rec_label_file = argv[4];
|
||||||
|
std::string table_char_dict_path = argv[5];
|
||||||
|
std::string test_image = argv[6];
|
||||||
|
InitAndInfer(det_model_dir, rec_model_dir, table_model_dir, rec_label_file,
|
||||||
|
table_char_dict_path, test_image, option);
|
||||||
|
return 0;
|
||||||
|
}
|
74
examples/vision/ocr/PP-OCR/cpu-gpu/cpp/infer_structurev2_table.cc
Executable file
74
examples/vision/ocr/PP-OCR/cpu-gpu/cpp/infer_structurev2_table.cc
Executable file
@@ -0,0 +1,74 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "fastdeploy/vision.h"
|
||||||
|
#ifdef WIN32
|
||||||
|
const char sep = '\\';
|
||||||
|
#else
|
||||||
|
const char sep = '/';
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void InitAndInfer(const std::string &table_model_dir,
|
||||||
|
const std::string &image_file,
|
||||||
|
const std::string &table_char_dict_path,
|
||||||
|
const fastdeploy::RuntimeOption &option) {
|
||||||
|
auto table_model_file = table_model_dir + sep + "inference.pdmodel";
|
||||||
|
auto table_params_file = table_model_dir + sep + "inference.pdiparams";
|
||||||
|
auto table_option = option;
|
||||||
|
|
||||||
|
auto table_model = fastdeploy::vision::ocr::StructureV2Table(
|
||||||
|
table_model_file, table_params_file, table_char_dict_path, table_option);
|
||||||
|
assert(table_model.Initialized());
|
||||||
|
|
||||||
|
auto im = cv::imread(image_file);
|
||||||
|
auto im_bak = im.clone();
|
||||||
|
|
||||||
|
fastdeploy::vision::OCRResult result;
|
||||||
|
if (!table_model.Predict(im, &result)) {
|
||||||
|
std::cerr << "Failed to predict." << std::endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << result.Str() << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]) {
|
||||||
|
if (argc < 5) {
|
||||||
|
std::cout << "Usage: infer_demo path/to/table_model path/to/image "
|
||||||
|
"path/to/table_dict_path"
|
||||||
|
"run_option, "
|
||||||
|
"e.g ./infer_structurev2_table ch_ppocr_mobile_v2.0_cls_infer "
|
||||||
|
"table.jpg table_structure_dict.txt 0"
|
||||||
|
<< std::endl;
|
||||||
|
std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
|
||||||
|
"with gpu;."
|
||||||
|
<< std::endl;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
fastdeploy::RuntimeOption option;
|
||||||
|
int flag = std::atoi(argv[4]);
|
||||||
|
|
||||||
|
if (flag == 0) {
|
||||||
|
option.UseCpu();
|
||||||
|
} else if (flag == 1) {
|
||||||
|
option.UseGpu();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string table_model_dir = argv[1];
|
||||||
|
std::string test_image = argv[2];
|
||||||
|
std::string table_char_dict_path = argv[3];
|
||||||
|
InitAndInfer(table_model_dir, test_image, table_char_dict_path, option);
|
||||||
|
return 0;
|
||||||
|
}
|
@@ -36,10 +36,15 @@ tar -xvf ch_ppocr_mobile_v2.0_cls_infer.tar
|
|||||||
# 下载PP-OCRv3文字识别模型
|
# 下载PP-OCRv3文字识别模型
|
||||||
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar
|
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar
|
||||||
tar -xvf ch_PP-OCRv3_rec_infer.tar
|
tar -xvf ch_PP-OCRv3_rec_infer.tar
|
||||||
|
# 下载PPStructureV2表格识别模型
|
||||||
|
wget https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar
|
||||||
|
tar xf ch_ppstructure_mobile_v2.0_SLANet_infer.tar
|
||||||
|
|
||||||
# 下载预测图片与字典文件
|
# 下载预测图片与字典文件
|
||||||
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg
|
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg
|
||||||
|
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppstructure/docs/table/table.jpg
|
||||||
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt
|
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt
|
||||||
|
wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/dict/table_structure_dict_ch.txt
|
||||||
|
|
||||||
# 运行部署示例
|
# 运行部署示例
|
||||||
# 在CPU上使用Paddle Inference推理
|
# 在CPU上使用Paddle Inference推理
|
||||||
@@ -71,6 +76,8 @@ python infer_cls.py --cls_model ch_ppocr_mobile_v2.0_cls_infer --image 12.jpg --
|
|||||||
# 在CPU上,单独使用文字识别模型部署
|
# 在CPU上,单独使用文字识别模型部署
|
||||||
python infer_rec.py --rec_model ch_PP-OCRv3_rec_infer --rec_label_file ppocr_keys_v1.txt --image 12.jpg --device cpu
|
python infer_rec.py --rec_model ch_PP-OCRv3_rec_infer --rec_label_file ppocr_keys_v1.txt --image 12.jpg --device cpu
|
||||||
|
|
||||||
|
# 在CPU上,单独使用文字识别模型部署
|
||||||
|
python infer_structurev2_table.py --table_model ./ch_ppstructure_mobile_v2.0_SLANet_infer --table_char_dict_path ./table_structure_dict_ch.txt --image table.jpg --device cpu
|
||||||
```
|
```
|
||||||
|
|
||||||
运行完成可视化结果如下图所示
|
运行完成可视化结果如下图所示
|
||||||
|
175
examples/vision/ocr/PP-OCR/cpu-gpu/python/infer_ppstructurev2_table.py
Executable file
175
examples/vision/ocr/PP-OCR/cpu-gpu/python/infer_ppstructurev2_table.py
Executable file
@@ -0,0 +1,175 @@
|
|||||||
|
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import fastdeploy as fd
|
||||||
|
import cv2
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
def parse_arguments():
|
||||||
|
import argparse
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument(
|
||||||
|
"--det_model", required=True, help="Path of Detection model of PPOCR.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--rec_model",
|
||||||
|
required=True,
|
||||||
|
help="Path of Recognization model of PPOCR.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--table_model",
|
||||||
|
required=True,
|
||||||
|
help="Path of Table recognition model of PPOCR.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--rec_label_file",
|
||||||
|
required=True,
|
||||||
|
help="Path of Recognization model of PPOCR.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--table_char_dict_path",
|
||||||
|
type=str,
|
||||||
|
required=True,
|
||||||
|
help="tabel recognition dict path.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--rec_bs",
|
||||||
|
type=int,
|
||||||
|
default=6,
|
||||||
|
help="Recognition model inference batch size")
|
||||||
|
parser.add_argument(
|
||||||
|
"--image", type=str, required=True, help="Path of test image file.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--device",
|
||||||
|
type=str,
|
||||||
|
default='cpu',
|
||||||
|
help="Type of inference device, support 'cpu' or 'gpu'.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--device_id",
|
||||||
|
type=int,
|
||||||
|
default=0,
|
||||||
|
help="Define which GPU card used to run model.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--backend",
|
||||||
|
type=str,
|
||||||
|
default="default",
|
||||||
|
help="Type of inference backend, support ort/trt/paddle/openvino, default 'openvino' for cpu, 'tensorrt' for gpu"
|
||||||
|
)
|
||||||
|
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def build_option(args):
|
||||||
|
det_option = fd.RuntimeOption()
|
||||||
|
rec_option = fd.RuntimeOption()
|
||||||
|
table_option = fd.RuntimeOption()
|
||||||
|
|
||||||
|
if args.device.lower() == "gpu":
|
||||||
|
det_option.use_gpu(args.device_id)
|
||||||
|
rec_option.use_gpu(args.device_id)
|
||||||
|
table_option.use_gpu(args.device_id)
|
||||||
|
|
||||||
|
if args.backend.lower() == "trt":
|
||||||
|
assert args.device.lower(
|
||||||
|
) == "gpu", "TensorRT backend require inference on device GPU."
|
||||||
|
det_option.use_trt_backend()
|
||||||
|
rec_option.use_trt_backend()
|
||||||
|
table_option.use_trt_backend()
|
||||||
|
|
||||||
|
# If use TRT backend, the dynamic shape will be set as follow.
|
||||||
|
# We recommend that users set the length and height of the detection model to a multiple of 32.
|
||||||
|
# We also recommend that users set the Trt input shape as follow.
|
||||||
|
det_option.set_trt_input_shape("x", [1, 3, 64, 64], [1, 3, 640, 640],
|
||||||
|
[1, 3, 960, 960])
|
||||||
|
|
||||||
|
rec_option.set_trt_input_shape("x", [1, 3, 48, 10],
|
||||||
|
[args.rec_bs, 3, 48, 320],
|
||||||
|
[args.rec_bs, 3, 48, 2304])
|
||||||
|
|
||||||
|
table_option.set_trt_input_shape("x", [1, 3, 488, 488])
|
||||||
|
|
||||||
|
# Users could save TRT cache file to disk as follow.
|
||||||
|
det_option.set_trt_cache_file(args.det_model + "/det_trt_cache.trt")
|
||||||
|
rec_option.set_trt_cache_file(args.rec_model + "/rec_trt_cache.trt")
|
||||||
|
table_option.set_trt_cache_file(args.table_model +
|
||||||
|
"/table_trt_cache.trt")
|
||||||
|
|
||||||
|
elif args.backend.lower() == "ort":
|
||||||
|
det_option.use_ort_backend()
|
||||||
|
rec_option.use_ort_backend()
|
||||||
|
table_option.use_ort_backend()
|
||||||
|
|
||||||
|
elif args.backend.lower() == "paddle":
|
||||||
|
det_option.use_paddle_infer_backend()
|
||||||
|
rec_option.use_paddle_infer_backend()
|
||||||
|
table_option.use_paddle_infer_backend()
|
||||||
|
|
||||||
|
elif args.backend.lower() == "openvino":
|
||||||
|
assert args.device.lower(
|
||||||
|
) == "cpu", "OpenVINO backend require inference on device CPU."
|
||||||
|
det_option.use_openvino_backend()
|
||||||
|
rec_option.use_openvino_backend()
|
||||||
|
table_option.use_openvino_backend()
|
||||||
|
|
||||||
|
return det_option, rec_option, table_option
|
||||||
|
|
||||||
|
|
||||||
|
args = parse_arguments()
|
||||||
|
|
||||||
|
det_model_file = os.path.join(args.det_model, "inference.pdmodel")
|
||||||
|
det_params_file = os.path.join(args.det_model, "inference.pdiparams")
|
||||||
|
|
||||||
|
rec_model_file = os.path.join(args.rec_model, "inference.pdmodel")
|
||||||
|
rec_params_file = os.path.join(args.rec_model, "inference.pdiparams")
|
||||||
|
rec_label_file = args.rec_label_file
|
||||||
|
|
||||||
|
table_model_file = os.path.join(args.table_model, "inference.pdmodel")
|
||||||
|
table_params_file = os.path.join(args.table_model, "inference.pdiparams")
|
||||||
|
table_char_dict_path = args.table_char_dict_path
|
||||||
|
|
||||||
|
# Set the runtime option
|
||||||
|
det_option, rec_option, table_option = build_option(args)
|
||||||
|
|
||||||
|
det_model = fd.vision.ocr.DBDetector(
|
||||||
|
det_model_file, det_params_file, runtime_option=det_option)
|
||||||
|
|
||||||
|
rec_model = fd.vision.ocr.Recognizer(
|
||||||
|
rec_model_file, rec_params_file, rec_label_file, runtime_option=rec_option)
|
||||||
|
|
||||||
|
table_model = fd.vision.ocr.StructureV2Table(
|
||||||
|
table_model_file,
|
||||||
|
table_params_file,
|
||||||
|
table_char_dict_path,
|
||||||
|
runtime_option=table_option)
|
||||||
|
|
||||||
|
det_model.preprocessor.max_side_len = 960
|
||||||
|
det_model.postprocessor.det_db_thresh = 0.3
|
||||||
|
det_model.postprocessor.det_db_box_thresh = 0.6
|
||||||
|
det_model.postprocessor.det_db_unclip_ratio = 1.5
|
||||||
|
det_model.postprocessor.det_db_score_mode = "slow"
|
||||||
|
det_model.postprocessor.use_dilation = False
|
||||||
|
|
||||||
|
ppstructurev2_table = fd.vision.ocr.PPStructureV2Table(
|
||||||
|
det_model=det_model, rec_model=rec_model, table_model=table_model)
|
||||||
|
|
||||||
|
ppstructurev2_table.rec_batch_size = args.rec_bs
|
||||||
|
|
||||||
|
# Read the input image
|
||||||
|
im = cv2.imread(args.image)
|
||||||
|
|
||||||
|
# Predict and reutrn the results
|
||||||
|
result = ppstructurev2_table.predict(im)
|
||||||
|
|
||||||
|
print(result)
|
||||||
|
|
||||||
|
# Visuliaze the results.
|
||||||
|
vis_im = fd.vision.vis_ppocr(im, result)
|
||||||
|
cv2.imwrite("visualized_result.jpg", vis_im)
|
||||||
|
print("Visualized result save in ./visualized_result.jpg")
|
77
examples/vision/ocr/PP-OCR/cpu-gpu/python/infer_structurev2_table.py
Executable file
77
examples/vision/ocr/PP-OCR/cpu-gpu/python/infer_structurev2_table.py
Executable file
@@ -0,0 +1,77 @@
|
|||||||
|
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import fastdeploy as fd
|
||||||
|
import cv2
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
def parse_arguments():
|
||||||
|
import argparse
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument(
|
||||||
|
"--table_model",
|
||||||
|
required=True,
|
||||||
|
help="Path of Table recognition model of PPOCR.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--table_char_dict_path",
|
||||||
|
type=str,
|
||||||
|
required=True,
|
||||||
|
help="tabel recognition dict path.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--image", type=str, required=True, help="Path of test image file.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--device",
|
||||||
|
type=str,
|
||||||
|
default='cpu',
|
||||||
|
help="Type of inference device, support 'cpu' or 'gpu'.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--device_id",
|
||||||
|
type=int,
|
||||||
|
default=0,
|
||||||
|
help="Define which GPU card used to run model.")
|
||||||
|
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def build_option(args):
|
||||||
|
|
||||||
|
table_option = fd.RuntimeOption()
|
||||||
|
|
||||||
|
if args.device.lower() == "gpu":
|
||||||
|
table_option.use_gpu(args.device_id)
|
||||||
|
|
||||||
|
return table_option
|
||||||
|
|
||||||
|
|
||||||
|
args = parse_arguments()
|
||||||
|
|
||||||
|
table_model_file = os.path.join(args.table_model, "inference.pdmodel")
|
||||||
|
table_params_file = os.path.join(args.table_model, "inference.pdiparams")
|
||||||
|
|
||||||
|
# Set the runtime option
|
||||||
|
table_option = build_option(args)
|
||||||
|
|
||||||
|
# Create the table_model
|
||||||
|
table_model = fd.vision.ocr.StructureV2Table(
|
||||||
|
table_model_file, table_params_file, args.table_char_dict_path,
|
||||||
|
table_option)
|
||||||
|
|
||||||
|
# Read the image
|
||||||
|
im = cv2.imread(args.image)
|
||||||
|
|
||||||
|
# Predict and return the results
|
||||||
|
result = table_model.predict(im)
|
||||||
|
|
||||||
|
print(result)
|
@@ -53,8 +53,10 @@
|
|||||||
#include "fastdeploy/vision/matting/ppmatting/ppmatting.h"
|
#include "fastdeploy/vision/matting/ppmatting/ppmatting.h"
|
||||||
#include "fastdeploy/vision/ocr/ppocr/classifier.h"
|
#include "fastdeploy/vision/ocr/ppocr/classifier.h"
|
||||||
#include "fastdeploy/vision/ocr/ppocr/dbdetector.h"
|
#include "fastdeploy/vision/ocr/ppocr/dbdetector.h"
|
||||||
|
#include "fastdeploy/vision/ocr/ppocr/structurev2_table.h"
|
||||||
#include "fastdeploy/vision/ocr/ppocr/ppocr_v2.h"
|
#include "fastdeploy/vision/ocr/ppocr/ppocr_v2.h"
|
||||||
#include "fastdeploy/vision/ocr/ppocr/ppocr_v3.h"
|
#include "fastdeploy/vision/ocr/ppocr/ppocr_v3.h"
|
||||||
|
#include "fastdeploy/vision/ocr/ppocr/ppstructurev2_table.h"
|
||||||
#include "fastdeploy/vision/ocr/ppocr/recognizer.h"
|
#include "fastdeploy/vision/ocr/ppocr/recognizer.h"
|
||||||
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
|
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
|
||||||
#include "fastdeploy/vision/segmentation/ppseg/model.h"
|
#include "fastdeploy/vision/segmentation/ppseg/model.h"
|
||||||
|
51
fastdeploy/vision/common/result.cc
Executable file → Normal file
51
fastdeploy/vision/common/result.cc
Executable file → Normal file
@@ -649,6 +649,32 @@ std::string OCRResult::Str() {
|
|||||||
}
|
}
|
||||||
out = out + "\n";
|
out = out + "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (table_boxes.size() > 0 && table_structure.size() > 0) {
|
||||||
|
for (int n = 0; n < boxes.size(); n++) {
|
||||||
|
out = out + "table boxes: [";
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
out = out + "[" + std::to_string(table_boxes[n][i * 2]) + "," +
|
||||||
|
std::to_string(table_boxes[n][i * 2 + 1]) + "]";
|
||||||
|
|
||||||
|
if (i != 1) {
|
||||||
|
out = out + ",";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out = out + "]";
|
||||||
|
}
|
||||||
|
|
||||||
|
out = out + "\ntable structure: ";
|
||||||
|
for (int m = 0; m < table_structure.size(); m++) {
|
||||||
|
out += table_structure[m];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!table_html.empty()) {
|
||||||
|
out = out + "\n" + "table html: " + table_html;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::vector<std::array<int, 8>> table_boxes;
|
||||||
|
std::vector<std::string> table_structure;
|
||||||
return out;
|
return out;
|
||||||
|
|
||||||
} else if (boxes.size() == 0 && rec_scores.size() > 0 &&
|
} else if (boxes.size() == 0 && rec_scores.size() > 0 &&
|
||||||
@@ -680,6 +706,31 @@ std::string OCRResult::Str() {
|
|||||||
out = out + "\n";
|
out = out + "\n";
|
||||||
}
|
}
|
||||||
return out;
|
return out;
|
||||||
|
} else if (boxes.size() == 0 && table_boxes.size() > 0 &&
|
||||||
|
table_structure.size() > 0) {
|
||||||
|
std::string out;
|
||||||
|
for (int n = 0; n < table_boxes.size(); n++) {
|
||||||
|
out = out + ", table boxes: [";
|
||||||
|
for (int i = 0; i < 2; i++) {
|
||||||
|
out = out + "[" + std::to_string(table_boxes[n][i * 2]) + "," +
|
||||||
|
std::to_string(table_boxes[n][i * 2 + 1]) + "]";
|
||||||
|
|
||||||
|
if (i != 1) {
|
||||||
|
out = out + ",";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out = out + "]";
|
||||||
|
}
|
||||||
|
|
||||||
|
out = out + "\ntable structure: ";
|
||||||
|
for (int m = 0; m < table_structure.size(); m++) {
|
||||||
|
out += table_structure[m];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!table_html.empty()) {
|
||||||
|
out = out + "\n" + "table html: " + table_html;
|
||||||
|
}
|
||||||
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
no_result = no_result + "No Results!";
|
no_result = no_result + "No Results!";
|
||||||
|
@@ -216,6 +216,10 @@ struct FASTDEPLOY_DECL OCRResult : public BaseResult {
|
|||||||
std::vector<float> cls_scores;
|
std::vector<float> cls_scores;
|
||||||
std::vector<int32_t> cls_labels;
|
std::vector<int32_t> cls_labels;
|
||||||
|
|
||||||
|
std::vector<std::array<int, 8>> table_boxes;
|
||||||
|
std::vector<std::string> table_structure;
|
||||||
|
std::string table_html;
|
||||||
|
|
||||||
ResultType type = ResultType::OCR;
|
ResultType type = ResultType::OCR;
|
||||||
|
|
||||||
void Clear();
|
void Clear();
|
||||||
|
@@ -19,11 +19,13 @@ namespace fastdeploy {
|
|||||||
void BindPPOCRModel(pybind11::module& m);
|
void BindPPOCRModel(pybind11::module& m);
|
||||||
void BindPPOCRv3(pybind11::module& m);
|
void BindPPOCRv3(pybind11::module& m);
|
||||||
void BindPPOCRv2(pybind11::module& m);
|
void BindPPOCRv2(pybind11::module& m);
|
||||||
|
void BindPPStructureV2Table(pybind11::module& m);
|
||||||
|
|
||||||
void BindOcr(pybind11::module& m) {
|
void BindOcr(pybind11::module& m) {
|
||||||
auto ocr_module = m.def_submodule("ocr", "Module to deploy OCR models");
|
auto ocr_module = m.def_submodule("ocr", "Module to deploy OCR models");
|
||||||
BindPPOCRModel(ocr_module);
|
BindPPOCRModel(ocr_module);
|
||||||
BindPPOCRv3(ocr_module);
|
BindPPOCRv3(ocr_module);
|
||||||
BindPPOCRv2(ocr_module);
|
BindPPOCRv2(ocr_module);
|
||||||
|
BindPPStructureV2Table(ocr_module);
|
||||||
}
|
}
|
||||||
} // namespace fastdeploy
|
} // namespace fastdeploy
|
||||||
|
@@ -321,5 +321,94 @@ void BindPPOCRModel(pybind11::module& m) {
|
|||||||
self.BatchPredict(images, &ocr_result);
|
self.BatchPredict(images, &ocr_result);
|
||||||
return ocr_result;
|
return ocr_result;
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Table
|
||||||
|
pybind11::class_<vision::ocr::StructureV2TablePreprocessor,
|
||||||
|
vision::ProcessorManager>(m, "StructureV2TablePreprocessor")
|
||||||
|
.def(pybind11::init<>())
|
||||||
|
.def("run", [](vision::ocr::StructureV2TablePreprocessor& self,
|
||||||
|
std::vector<pybind11::array>& im_list) {
|
||||||
|
std::vector<vision::FDMat> images;
|
||||||
|
for (size_t i = 0; i < im_list.size(); ++i) {
|
||||||
|
images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
|
||||||
|
}
|
||||||
|
std::vector<FDTensor> outputs;
|
||||||
|
if (!self.Run(&images, &outputs)) {
|
||||||
|
throw std::runtime_error(
|
||||||
|
"Failed to preprocess the input data in "
|
||||||
|
"StructureV2TablePreprocessor.");
|
||||||
|
}
|
||||||
|
|
||||||
|
auto batch_det_img_info = self.GetBatchImgInfo();
|
||||||
|
for (size_t i = 0; i < outputs.size(); ++i) {
|
||||||
|
outputs[i].StopSharing();
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::make_pair(outputs, *batch_det_img_info);
|
||||||
|
});
|
||||||
|
|
||||||
|
pybind11::class_<vision::ocr::StructureV2TablePostprocessor>(
|
||||||
|
m, "StructureV2TablePostprocessor")
|
||||||
|
.def(pybind11::init<std::string>())
|
||||||
|
.def("run",
|
||||||
|
[](vision::ocr::StructureV2TablePostprocessor& self,
|
||||||
|
std::vector<FDTensor>& inputs,
|
||||||
|
const std::vector<std::array<int, 4>>& batch_det_img_info) {
|
||||||
|
std::vector<std::vector<std::array<int, 8>>> boxes;
|
||||||
|
std::vector<std::vector<std::string>> structure_list;
|
||||||
|
|
||||||
|
if (!self.Run(inputs, &boxes, &structure_list,
|
||||||
|
batch_det_img_info)) {
|
||||||
|
throw std::runtime_error(
|
||||||
|
"Failed to preprocess the input data in "
|
||||||
|
"StructureV2TablePostprocessor.");
|
||||||
|
}
|
||||||
|
return std::make_pair(boxes, structure_list);
|
||||||
|
})
|
||||||
|
.def("run",
|
||||||
|
[](vision::ocr::StructureV2TablePostprocessor& self,
|
||||||
|
std::vector<pybind11::array>& input_array,
|
||||||
|
const std::vector<std::array<int, 4>>& batch_det_img_info) {
|
||||||
|
std::vector<FDTensor> inputs;
|
||||||
|
PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
|
||||||
|
std::vector<std::vector<std::array<int, 8>>> boxes;
|
||||||
|
std::vector<std::vector<std::string>> structure_list;
|
||||||
|
|
||||||
|
if (!self.Run(inputs, &boxes, &structure_list,
|
||||||
|
batch_det_img_info)) {
|
||||||
|
throw std::runtime_error(
|
||||||
|
"Failed to preprocess the input data in "
|
||||||
|
"StructureV2TablePostprocessor.");
|
||||||
|
}
|
||||||
|
return std::make_pair(boxes, structure_list);
|
||||||
|
});
|
||||||
|
|
||||||
|
pybind11::class_<vision::ocr::StructureV2Table, FastDeployModel>(
|
||||||
|
m, "StructureV2Table")
|
||||||
|
.def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
|
||||||
|
ModelFormat>())
|
||||||
|
.def(pybind11::init<>())
|
||||||
|
.def_property_readonly("preprocessor",
|
||||||
|
&vision::ocr::StructureV2Table::GetPreprocessor)
|
||||||
|
.def_property_readonly("postprocessor",
|
||||||
|
&vision::ocr::StructureV2Table::GetPostprocessor)
|
||||||
|
.def("predict",
|
||||||
|
[](vision::ocr::StructureV2Table& self, pybind11::array& data) {
|
||||||
|
auto mat = PyArrayToCvMat(data);
|
||||||
|
vision::OCRResult ocr_result;
|
||||||
|
self.Predict(mat, &ocr_result);
|
||||||
|
return ocr_result;
|
||||||
|
})
|
||||||
|
.def("batch_predict", [](vision::ocr::StructureV2Table& self,
|
||||||
|
std::vector<pybind11::array>& data) {
|
||||||
|
std::vector<cv::Mat> images;
|
||||||
|
for (size_t i = 0; i < data.size(); ++i) {
|
||||||
|
images.push_back(PyArrayToCvMat(data[i]));
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<vision::OCRResult> ocr_results;
|
||||||
|
self.BatchPredict(images, &ocr_results);
|
||||||
|
return ocr_results;
|
||||||
|
});
|
||||||
}
|
}
|
||||||
} // namespace fastdeploy
|
} // namespace fastdeploy
|
||||||
|
108
fastdeploy/vision/ocr/ppocr/ppocr_pybind.cc
Executable file → Normal file
108
fastdeploy/vision/ocr/ppocr/ppocr_pybind.cc
Executable file → Normal file
@@ -12,64 +12,96 @@
|
|||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
#include <pybind11/stl.h>
|
#include <pybind11/stl.h>
|
||||||
|
|
||||||
#include "fastdeploy/pybind/main.h"
|
#include "fastdeploy/pybind/main.h"
|
||||||
|
|
||||||
namespace fastdeploy {
|
namespace fastdeploy {
|
||||||
void BindPPOCRv3(pybind11::module& m) {
|
void BindPPOCRv3(pybind11::module& m) {
|
||||||
// PPOCRv3
|
// PPOCRv3
|
||||||
pybind11::class_<pipeline::PPOCRv3, FastDeployModel>(
|
pybind11::class_<pipeline::PPOCRv3, FastDeployModel>(m, "PPOCRv3")
|
||||||
m, "PPOCRv3")
|
|
||||||
|
|
||||||
.def(pybind11::init<fastdeploy::vision::ocr::DBDetector*,
|
.def(pybind11::init<fastdeploy::vision::ocr::DBDetector*,
|
||||||
fastdeploy::vision::ocr::Classifier*,
|
fastdeploy::vision::ocr::Classifier*,
|
||||||
fastdeploy::vision::ocr::Recognizer*>())
|
fastdeploy::vision::ocr::Recognizer*>())
|
||||||
.def(pybind11::init<fastdeploy::vision::ocr::DBDetector*,
|
.def(pybind11::init<fastdeploy::vision::ocr::DBDetector*,
|
||||||
fastdeploy::vision::ocr::Recognizer*>())
|
fastdeploy::vision::ocr::Recognizer*>())
|
||||||
.def_property("cls_batch_size", &pipeline::PPOCRv3::GetClsBatchSize, &pipeline::PPOCRv3::SetClsBatchSize)
|
.def_property("cls_batch_size", &pipeline::PPOCRv3::GetClsBatchSize,
|
||||||
.def_property("rec_batch_size", &pipeline::PPOCRv3::GetRecBatchSize, &pipeline::PPOCRv3::SetRecBatchSize)
|
&pipeline::PPOCRv3::SetClsBatchSize)
|
||||||
.def("clone", [](pipeline::PPOCRv3& self) {
|
.def_property("rec_batch_size", &pipeline::PPOCRv3::GetRecBatchSize,
|
||||||
return self.Clone();
|
&pipeline::PPOCRv3::SetRecBatchSize)
|
||||||
})
|
.def("clone", [](pipeline::PPOCRv3& self) { return self.Clone(); })
|
||||||
.def("predict", [](pipeline::PPOCRv3& self,
|
.def("predict",
|
||||||
pybind11::array& data) {
|
[](pipeline::PPOCRv3& self, pybind11::array& data) {
|
||||||
auto mat = PyArrayToCvMat(data);
|
auto mat = PyArrayToCvMat(data);
|
||||||
vision::OCRResult res;
|
vision::OCRResult res;
|
||||||
self.Predict(&mat, &res);
|
self.Predict(&mat, &res);
|
||||||
return res;
|
return res;
|
||||||
})
|
})
|
||||||
.def("batch_predict", [](pipeline::PPOCRv3& self, std::vector<pybind11::array>& data) {
|
.def("batch_predict",
|
||||||
std::vector<cv::Mat> images;
|
[](pipeline::PPOCRv3& self, std::vector<pybind11::array>& data) {
|
||||||
for (size_t i = 0; i < data.size(); ++i) {
|
std::vector<cv::Mat> images;
|
||||||
images.push_back(PyArrayToCvMat(data[i]));
|
for (size_t i = 0; i < data.size(); ++i) {
|
||||||
}
|
images.push_back(PyArrayToCvMat(data[i]));
|
||||||
std::vector<vision::OCRResult> results;
|
}
|
||||||
self.BatchPredict(images, &results);
|
std::vector<vision::OCRResult> results;
|
||||||
return results;
|
self.BatchPredict(images, &results);
|
||||||
});
|
return results;
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void BindPPOCRv2(pybind11::module& m) {
|
void BindPPOCRv2(pybind11::module& m) {
|
||||||
// PPOCRv2
|
// PPOCRv2
|
||||||
pybind11::class_<pipeline::PPOCRv2, FastDeployModel>(
|
pybind11::class_<pipeline::PPOCRv2, FastDeployModel>(m, "PPOCRv2")
|
||||||
m, "PPOCRv2")
|
|
||||||
.def(pybind11::init<fastdeploy::vision::ocr::DBDetector*,
|
.def(pybind11::init<fastdeploy::vision::ocr::DBDetector*,
|
||||||
fastdeploy::vision::ocr::Classifier*,
|
fastdeploy::vision::ocr::Classifier*,
|
||||||
fastdeploy::vision::ocr::Recognizer*>())
|
fastdeploy::vision::ocr::Recognizer*>())
|
||||||
.def(pybind11::init<fastdeploy::vision::ocr::DBDetector*,
|
.def(pybind11::init<fastdeploy::vision::ocr::DBDetector*,
|
||||||
fastdeploy::vision::ocr::Recognizer*>())
|
fastdeploy::vision::ocr::Recognizer*>())
|
||||||
.def_property("cls_batch_size", &pipeline::PPOCRv2::GetClsBatchSize, &pipeline::PPOCRv2::SetClsBatchSize)
|
.def_property("cls_batch_size", &pipeline::PPOCRv2::GetClsBatchSize,
|
||||||
.def_property("rec_batch_size", &pipeline::PPOCRv2::GetRecBatchSize, &pipeline::PPOCRv2::SetRecBatchSize)
|
&pipeline::PPOCRv2::SetClsBatchSize)
|
||||||
.def("clone", [](pipeline::PPOCRv2& self) {
|
.def_property("rec_batch_size", &pipeline::PPOCRv2::GetRecBatchSize,
|
||||||
return self.Clone();
|
&pipeline::PPOCRv2::SetRecBatchSize)
|
||||||
})
|
.def("clone", [](pipeline::PPOCRv2& self) { return self.Clone(); })
|
||||||
.def("predict", [](pipeline::PPOCRv2& self,
|
.def("predict",
|
||||||
pybind11::array& data) {
|
[](pipeline::PPOCRv2& self, pybind11::array& data) {
|
||||||
auto mat = PyArrayToCvMat(data);
|
auto mat = PyArrayToCvMat(data);
|
||||||
vision::OCRResult res;
|
vision::OCRResult res;
|
||||||
self.Predict(&mat, &res);
|
self.Predict(&mat, &res);
|
||||||
return res;
|
return res;
|
||||||
})
|
})
|
||||||
.def("batch_predict", [](pipeline::PPOCRv2& self, std::vector<pybind11::array>& data) {
|
.def("batch_predict",
|
||||||
|
[](pipeline::PPOCRv2& self, std::vector<pybind11::array>& data) {
|
||||||
|
std::vector<cv::Mat> images;
|
||||||
|
for (size_t i = 0; i < data.size(); ++i) {
|
||||||
|
images.push_back(PyArrayToCvMat(data[i]));
|
||||||
|
}
|
||||||
|
std::vector<vision::OCRResult> results;
|
||||||
|
self.BatchPredict(images, &results);
|
||||||
|
return results;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
void BindPPStructureV2Table(pybind11::module& m) {
|
||||||
|
// PPStructureV2Table
|
||||||
|
pybind11::class_<pipeline::PPStructureV2Table, FastDeployModel>(
|
||||||
|
m, "PPStructureV2Table")
|
||||||
|
.def(pybind11::init<fastdeploy::vision::ocr::DBDetector*,
|
||||||
|
fastdeploy::vision::ocr::Recognizer*,
|
||||||
|
fastdeploy::vision::ocr::StructureV2Table*>())
|
||||||
|
.def_property("rec_batch_size",
|
||||||
|
&pipeline::PPStructureV2Table::GetRecBatchSize,
|
||||||
|
&pipeline::PPStructureV2Table::SetRecBatchSize)
|
||||||
|
.def("clone",
|
||||||
|
[](pipeline::PPStructureV2Table& self) { return self.Clone(); })
|
||||||
|
.def("predict",
|
||||||
|
[](pipeline::PPStructureV2Table& self, pybind11::array& data) {
|
||||||
|
auto mat = PyArrayToCvMat(data);
|
||||||
|
vision::OCRResult res;
|
||||||
|
self.Predict(&mat, &res);
|
||||||
|
return res;
|
||||||
|
})
|
||||||
|
.def("batch_predict", [](pipeline::PPStructureV2Table& self,
|
||||||
|
std::vector<pybind11::array>& data) {
|
||||||
std::vector<cv::Mat> images;
|
std::vector<cv::Mat> images;
|
||||||
for (size_t i = 0; i < data.size(); ++i) {
|
for (size_t i = 0; i < data.size(); ++i) {
|
||||||
images.push_back(PyArrayToCvMat(data[i]));
|
images.push_back(PyArrayToCvMat(data[i]));
|
||||||
|
233
fastdeploy/vision/ocr/ppocr/ppstructurev2_table.cc
Normal file
233
fastdeploy/vision/ocr/ppocr/ppstructurev2_table.cc
Normal file
@@ -0,0 +1,233 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "fastdeploy/vision/ocr/ppocr/ppstructurev2_table.h"
|
||||||
|
|
||||||
|
#include "fastdeploy/utils/perf.h"
|
||||||
|
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
|
||||||
|
|
||||||
|
namespace fastdeploy {
|
||||||
|
namespace pipeline {
|
||||||
|
PPStructureV2Table::PPStructureV2Table(
|
||||||
|
fastdeploy::vision::ocr::DBDetector* det_model,
|
||||||
|
fastdeploy::vision::ocr::Recognizer* rec_model,
|
||||||
|
fastdeploy::vision::ocr::StructureV2Table* table_model)
|
||||||
|
: detector_(det_model), recognizer_(rec_model), table_(table_model) {
|
||||||
|
Initialized();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool PPStructureV2Table::SetRecBatchSize(int rec_batch_size) {
|
||||||
|
if (rec_batch_size < -1 || rec_batch_size == 0) {
|
||||||
|
FDERROR << "batch_size > 0 or batch_size == -1." << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
rec_batch_size_ = rec_batch_size;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
int PPStructureV2Table::GetRecBatchSize() { return rec_batch_size_; }
|
||||||
|
|
||||||
|
bool PPStructureV2Table::Initialized() const {
|
||||||
|
if (detector_ != nullptr && !detector_->Initialized()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (recognizer_ != nullptr && !recognizer_->Initialized()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (table_ != nullptr && !table_->Initialized()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<PPStructureV2Table> PPStructureV2Table::Clone() const {
|
||||||
|
std::unique_ptr<PPStructureV2Table> clone_model =
|
||||||
|
utils::make_unique<PPStructureV2Table>(PPStructureV2Table(*this));
|
||||||
|
clone_model->detector_ = detector_->Clone().release();
|
||||||
|
clone_model->recognizer_ = recognizer_->Clone().release();
|
||||||
|
clone_model->table_ = table_->Clone().release();
|
||||||
|
return clone_model;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool PPStructureV2Table::Predict(cv::Mat* img,
|
||||||
|
fastdeploy::vision::OCRResult* result) {
|
||||||
|
return Predict(*img, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool PPStructureV2Table::Predict(const cv::Mat& img,
|
||||||
|
fastdeploy::vision::OCRResult* result) {
|
||||||
|
std::vector<fastdeploy::vision::OCRResult> batch_result(1);
|
||||||
|
bool success = BatchPredict({img}, &batch_result);
|
||||||
|
if (!success) {
|
||||||
|
return success;
|
||||||
|
}
|
||||||
|
*result = std::move(batch_result[0]);
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
|
||||||
|
bool PPStructureV2Table::BatchPredict(
|
||||||
|
const std::vector<cv::Mat>& images,
|
||||||
|
std::vector<fastdeploy::vision::OCRResult>* batch_result) {
|
||||||
|
batch_result->clear();
|
||||||
|
batch_result->resize(images.size());
|
||||||
|
std::vector<std::vector<std::array<int, 8>>> batch_boxes(images.size());
|
||||||
|
|
||||||
|
if (!detector_->BatchPredict(images, &batch_boxes)) {
|
||||||
|
FDERROR << "There's error while detecting image in PPOCR." << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i_batch = 0; i_batch < batch_boxes.size(); ++i_batch) {
|
||||||
|
vision::ocr::SortBoxes(&(batch_boxes[i_batch]));
|
||||||
|
(*batch_result)[i_batch].boxes = batch_boxes[i_batch];
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i_batch = 0; i_batch < images.size(); ++i_batch) {
|
||||||
|
fastdeploy::vision::OCRResult& ocr_result = (*batch_result)[i_batch];
|
||||||
|
// Get croped images by detection result
|
||||||
|
const std::vector<std::array<int, 8>>& boxes = ocr_result.boxes;
|
||||||
|
const cv::Mat& img = images[i_batch];
|
||||||
|
std::vector<cv::Mat> image_list;
|
||||||
|
if (boxes.size() == 0) {
|
||||||
|
image_list.emplace_back(img);
|
||||||
|
} else {
|
||||||
|
image_list.resize(boxes.size());
|
||||||
|
for (size_t i_box = 0; i_box < boxes.size(); ++i_box) {
|
||||||
|
image_list[i_box] = vision::ocr::GetRotateCropImage(img, boxes[i_box]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::vector<int32_t>* cls_labels_ptr = &ocr_result.cls_labels;
|
||||||
|
std::vector<float>* cls_scores_ptr = &ocr_result.cls_scores;
|
||||||
|
|
||||||
|
std::vector<std::string>* text_ptr = &ocr_result.text;
|
||||||
|
std::vector<float>* rec_scores_ptr = &ocr_result.rec_scores;
|
||||||
|
|
||||||
|
std::vector<float> width_list;
|
||||||
|
for (int i = 0; i < image_list.size(); i++) {
|
||||||
|
width_list.push_back(float(image_list[i].cols) / image_list[i].rows);
|
||||||
|
}
|
||||||
|
std::vector<int> indices = vision::ocr::ArgSort(width_list);
|
||||||
|
|
||||||
|
for (size_t start_index = 0; start_index < image_list.size();
|
||||||
|
start_index += rec_batch_size_) {
|
||||||
|
size_t end_index =
|
||||||
|
std::min(start_index + rec_batch_size_, image_list.size());
|
||||||
|
if (!recognizer_->BatchPredict(image_list, text_ptr, rec_scores_ptr,
|
||||||
|
start_index, end_index, indices)) {
|
||||||
|
FDERROR << "There's error while recognizing image in PPOCR."
|
||||||
|
<< std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!table_->BatchPredict(images, batch_result)) {
|
||||||
|
FDERROR << "There's error while recognizing tables in images." << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i_batch = 0; i_batch < batch_boxes.size(); ++i_batch) {
|
||||||
|
fastdeploy::vision::OCRResult& ocr_result = (*batch_result)[i_batch];
|
||||||
|
std::vector<std::vector<std::string>> matched(ocr_result.table_boxes.size(),
|
||||||
|
std::vector<std::string>());
|
||||||
|
|
||||||
|
std::vector<int> ocr_box;
|
||||||
|
std::vector<int> structure_box;
|
||||||
|
for (int i = 0; i < ocr_result.boxes.size(); i++) {
|
||||||
|
ocr_box = vision::ocr::Xyxyxyxy2Xyxy(ocr_result.boxes[i]);
|
||||||
|
ocr_box[0] -= 1;
|
||||||
|
ocr_box[1] -= 1;
|
||||||
|
ocr_box[2] += 1;
|
||||||
|
ocr_box[3] += 1;
|
||||||
|
|
||||||
|
std::vector<std::vector<float>> dis_list(ocr_result.table_boxes.size(),
|
||||||
|
std::vector<float>(3, 100000.0));
|
||||||
|
|
||||||
|
for (int j = 0; j < ocr_result.table_boxes.size(); j++) {
|
||||||
|
structure_box = vision::ocr::Xyxyxyxy2Xyxy(ocr_result.table_boxes[j]);
|
||||||
|
dis_list[j][0] = vision::ocr::Dis(ocr_box, structure_box);
|
||||||
|
dis_list[j][1] = 1 - vision::ocr::Iou(ocr_box, structure_box);
|
||||||
|
dis_list[j][2] = j;
|
||||||
|
}
|
||||||
|
|
||||||
|
// find min dis idx
|
||||||
|
std::sort(dis_list.begin(), dis_list.end(), vision::ocr::ComparisonDis);
|
||||||
|
matched[dis_list[0][2]].push_back(ocr_result.text[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// get pred html
|
||||||
|
std::string html_str = "";
|
||||||
|
int td_tag_idx = 0;
|
||||||
|
auto structure_html_tags = ocr_result.table_structure;
|
||||||
|
for (int i = 0; i < structure_html_tags.size(); i++) {
|
||||||
|
if (structure_html_tags[i].find("</td>") != std::string::npos) {
|
||||||
|
if (structure_html_tags[i].find("<td></td>") != std::string::npos) {
|
||||||
|
html_str += "<td>";
|
||||||
|
}
|
||||||
|
if (matched[td_tag_idx].size() > 0) {
|
||||||
|
bool b_with = false;
|
||||||
|
if (matched[td_tag_idx][0].find("<b>") != std::string::npos &&
|
||||||
|
matched[td_tag_idx].size() > 1) {
|
||||||
|
b_with = true;
|
||||||
|
html_str += "<b>";
|
||||||
|
}
|
||||||
|
for (int j = 0; j < matched[td_tag_idx].size(); j++) {
|
||||||
|
std::string content = matched[td_tag_idx][j];
|
||||||
|
if (matched[td_tag_idx].size() > 1) {
|
||||||
|
// remove blank, <b> and </b>
|
||||||
|
if (content.length() > 0 && content.at(0) == ' ') {
|
||||||
|
content = content.substr(0);
|
||||||
|
}
|
||||||
|
if (content.length() > 2 && content.substr(0, 3) == "<b>") {
|
||||||
|
content = content.substr(3);
|
||||||
|
}
|
||||||
|
if (content.length() > 4 &&
|
||||||
|
content.substr(content.length() - 4) == "</b>") {
|
||||||
|
content = content.substr(0, content.length() - 4);
|
||||||
|
}
|
||||||
|
if (content.empty()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// add blank
|
||||||
|
if (j != matched[td_tag_idx].size() - 1 &&
|
||||||
|
content.at(content.length() - 1) != ' ') {
|
||||||
|
content += ' ';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
html_str += content;
|
||||||
|
}
|
||||||
|
if (b_with) {
|
||||||
|
html_str += "</b>";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (structure_html_tags[i].find("<td></td>") != std::string::npos) {
|
||||||
|
html_str += "</td>";
|
||||||
|
} else {
|
||||||
|
html_str += structure_html_tags[i];
|
||||||
|
}
|
||||||
|
td_tag_idx += 1;
|
||||||
|
} else {
|
||||||
|
html_str += structure_html_tags[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(*batch_result)[i_batch].table_html = html_str;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace pipeline
|
||||||
|
} // namespace fastdeploy
|
93
fastdeploy/vision/ocr/ppocr/ppstructurev2_table.h
Executable file
93
fastdeploy/vision/ocr/ppocr/ppstructurev2_table.h
Executable file
@@ -0,0 +1,93 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "fastdeploy/fastdeploy_model.h"
|
||||||
|
#include "fastdeploy/vision/common/processors/transform.h"
|
||||||
|
#include "fastdeploy/vision/common/result.h"
|
||||||
|
|
||||||
|
#include "fastdeploy/vision/ocr/ppocr/structurev2_table.h"
|
||||||
|
#include "fastdeploy/vision/ocr/ppocr/dbdetector.h"
|
||||||
|
#include "fastdeploy/vision/ocr/ppocr/recognizer.h"
|
||||||
|
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_postprocess_op.h"
|
||||||
|
#include "fastdeploy/utils/unique_ptr.h"
|
||||||
|
|
||||||
|
namespace fastdeploy {
|
||||||
|
/** \brief This pipeline can launch detection model, classification model and recognition model sequentially. All OCR pipeline APIs are defined inside this namespace.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
namespace pipeline {
|
||||||
|
/*! @brief PPStructureV2Table is used to load PP-OCRv2 series models provided by PaddleOCR.
|
||||||
|
*/
|
||||||
|
class FASTDEPLOY_DECL PPStructureV2Table : public FastDeployModel {
|
||||||
|
public:
|
||||||
|
/** \brief Set up the detection model path, recognition model path and table model path respectively.
|
||||||
|
*
|
||||||
|
* \param[in] det_model Path of detection model, e.g ./ch_PP-OCRv2_det_infer
|
||||||
|
* \param[in] rec_model Path of recognition model, e.g ./ch_PP-OCRv2_rec_infer
|
||||||
|
* \param[in] table_model Path of table recognition model, e.g ./en_ppstructure_mobile_v2.0_SLANet_infer
|
||||||
|
*/
|
||||||
|
PPStructureV2Table(fastdeploy::vision::ocr::DBDetector* det_model,
|
||||||
|
fastdeploy::vision::ocr::Recognizer* rec_model,
|
||||||
|
fastdeploy::vision::ocr::StructureV2Table* table_model);
|
||||||
|
|
||||||
|
|
||||||
|
/** \brief Clone a new PPStructureV2Table with less memory usage when multiple instances of the same model are created
|
||||||
|
*
|
||||||
|
* \return new PPStructureV2Table* type unique pointer
|
||||||
|
*/
|
||||||
|
std::unique_ptr<PPStructureV2Table> Clone() const;
|
||||||
|
|
||||||
|
/** \brief Predict the input image and get OCR result.
|
||||||
|
*
|
||||||
|
* \param[in] im The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format.
|
||||||
|
* \param[in] result The output OCR result will be writen to this structure.
|
||||||
|
* \return true if the prediction successed, otherwise false.
|
||||||
|
*/
|
||||||
|
virtual bool Predict(cv::Mat* img, fastdeploy::vision::OCRResult* result);
|
||||||
|
virtual bool Predict(const cv::Mat& img,
|
||||||
|
fastdeploy::vision::OCRResult* result);
|
||||||
|
/** \brief BatchPredict the input image and get OCR result.
|
||||||
|
*
|
||||||
|
* \param[in] images The list of input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format.
|
||||||
|
* \param[in] batch_result The output list of OCR result will be writen to this structure.
|
||||||
|
* \return true if the prediction successed, otherwise false.
|
||||||
|
*/
|
||||||
|
virtual bool BatchPredict(const std::vector<cv::Mat>& images,
|
||||||
|
std::vector<fastdeploy::vision::OCRResult>* batch_result);
|
||||||
|
|
||||||
|
bool Initialized() const override;
|
||||||
|
bool SetRecBatchSize(int rec_batch_size);
|
||||||
|
int GetRecBatchSize();
|
||||||
|
|
||||||
|
protected:
|
||||||
|
fastdeploy::vision::ocr::DBDetector* detector_ = nullptr;
|
||||||
|
fastdeploy::vision::ocr::Recognizer* recognizer_ = nullptr;
|
||||||
|
fastdeploy::vision::ocr::StructureV2Table* table_ = nullptr;
|
||||||
|
|
||||||
|
private:
|
||||||
|
int rec_batch_size_ = 6;
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace application {
|
||||||
|
namespace ocrsystem {
|
||||||
|
typedef pipeline::PPStructureV2Table PPStructureV2TableSystem;
|
||||||
|
} // namespace ocrsystem
|
||||||
|
} // namespace application
|
||||||
|
|
||||||
|
} // namespace pipeline
|
||||||
|
} // namespace fastdeploy
|
133
fastdeploy/vision/ocr/ppocr/structurev2_table.cc
Normal file
133
fastdeploy/vision/ocr/ppocr/structurev2_table.cc
Normal file
@@ -0,0 +1,133 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "fastdeploy/vision/ocr/ppocr/structurev2_table.h"
|
||||||
|
|
||||||
|
#include "fastdeploy/utils/perf.h"
|
||||||
|
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
|
||||||
|
|
||||||
|
namespace fastdeploy {
|
||||||
|
namespace vision {
|
||||||
|
namespace ocr {
|
||||||
|
|
||||||
|
StructureV2Table::StructureV2Table() {}
|
||||||
|
StructureV2Table::StructureV2Table(const std::string& model_file,
|
||||||
|
const std::string& params_file,
|
||||||
|
const std::string& table_char_dict_path,
|
||||||
|
const RuntimeOption& custom_option,
|
||||||
|
const ModelFormat& model_format)
|
||||||
|
: postprocessor_(table_char_dict_path) {
|
||||||
|
if (model_format == ModelFormat::ONNX) {
|
||||||
|
valid_cpu_backends = {Backend::ORT, Backend::OPENVINO};
|
||||||
|
valid_gpu_backends = {Backend::ORT, Backend::TRT};
|
||||||
|
} else {
|
||||||
|
valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO,
|
||||||
|
Backend::LITE};
|
||||||
|
valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
|
||||||
|
valid_kunlunxin_backends = {Backend::LITE};
|
||||||
|
valid_ascend_backends = {Backend::LITE};
|
||||||
|
valid_sophgonpu_backends = {Backend::SOPHGOTPU};
|
||||||
|
valid_rknpu_backends = {Backend::RKNPU2};
|
||||||
|
}
|
||||||
|
|
||||||
|
runtime_option = custom_option;
|
||||||
|
runtime_option.model_format = model_format;
|
||||||
|
runtime_option.model_file = model_file;
|
||||||
|
runtime_option.params_file = params_file;
|
||||||
|
initialized = Initialize();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Init
|
||||||
|
bool StructureV2Table::Initialize() {
|
||||||
|
if (!InitRuntime()) {
|
||||||
|
FDERROR << "Failed to initialize fastdeploy backend." << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<StructureV2Table> StructureV2Table::Clone() const {
|
||||||
|
std::unique_ptr<StructureV2Table> clone_model =
|
||||||
|
utils::make_unique<StructureV2Table>(StructureV2Table(*this));
|
||||||
|
clone_model->SetRuntime(clone_model->CloneRuntime());
|
||||||
|
return clone_model;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool StructureV2Table::Predict(const cv::Mat& img,
|
||||||
|
std::vector<std::array<int, 8>>* boxes_result,
|
||||||
|
std::vector<std::string>* structure_result) {
|
||||||
|
std::vector<std::vector<std::array<int, 8>>> det_results;
|
||||||
|
std::vector<std::vector<std::string>> structure_results;
|
||||||
|
if (!BatchPredict({img}, &det_results, &structure_results)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
*boxes_result = std::move(det_results[0]);
|
||||||
|
*structure_result = std::move(structure_results[0]);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool StructureV2Table::Predict(const cv::Mat& img,
|
||||||
|
vision::OCRResult* ocr_result) {
|
||||||
|
if (!Predict(img, &(ocr_result->table_boxes),
|
||||||
|
&(ocr_result->table_structure))) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool StructureV2Table::BatchPredict(
|
||||||
|
const std::vector<cv::Mat>& images,
|
||||||
|
std::vector<vision::OCRResult>* ocr_results) {
|
||||||
|
std::vector<std::vector<std::array<int, 8>>> det_results;
|
||||||
|
std::vector<std::vector<std::string>> structure_results;
|
||||||
|
if (!BatchPredict(images, &det_results, &structure_results)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
ocr_results->resize(det_results.size());
|
||||||
|
for (int i = 0; i < det_results.size(); i++) {
|
||||||
|
(*ocr_results)[i].table_boxes = std::move(det_results[i]);
|
||||||
|
(*ocr_results)[i].table_structure = std::move(structure_results[i]);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool StructureV2Table::BatchPredict(
|
||||||
|
const std::vector<cv::Mat>& images,
|
||||||
|
std::vector<std::vector<std::array<int, 8>>>* det_results,
|
||||||
|
std::vector<std::vector<std::string>>* structure_results) {
|
||||||
|
std::vector<FDMat> fd_images = WrapMat(images);
|
||||||
|
if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) {
|
||||||
|
FDERROR << "Failed to preprocess input image." << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
auto batch_det_img_info = preprocessor_.GetBatchImgInfo();
|
||||||
|
|
||||||
|
reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
|
||||||
|
if (!Infer(reused_input_tensors_, &reused_output_tensors_)) {
|
||||||
|
FDERROR << "Failed to inference by runtime." << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!postprocessor_.Run(reused_output_tensors_, det_results,
|
||||||
|
structure_results, *batch_det_img_info)) {
|
||||||
|
FDERROR << "Failed to postprocess the inference cls_results by runtime."
|
||||||
|
<< std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace ocr
|
||||||
|
} // namespace vision
|
||||||
|
} // namespace fastdeploy
|
113
fastdeploy/vision/ocr/ppocr/structurev2_table.h
Executable file
113
fastdeploy/vision/ocr/ppocr/structurev2_table.h
Executable file
@@ -0,0 +1,113 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
#include "fastdeploy/fastdeploy_model.h"
|
||||||
|
#include "fastdeploy/vision/common/processors/transform.h"
|
||||||
|
#include "fastdeploy/vision/common/result.h"
|
||||||
|
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_postprocess_op.h"
|
||||||
|
#include "fastdeploy/vision/ocr/ppocr/structurev2_table_postprocessor.h"
|
||||||
|
#include "fastdeploy/vision/ocr/ppocr/structurev2_table_preprocessor.h"
|
||||||
|
#include "fastdeploy/utils/unique_ptr.h"
|
||||||
|
|
||||||
|
namespace fastdeploy {
|
||||||
|
namespace vision {
|
||||||
|
/** \brief All OCR series model APIs are defined inside this namespace
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
namespace ocr {
|
||||||
|
|
||||||
|
/*! @brief DBDetector object is used to load the detection model provided by PaddleOCR.
|
||||||
|
*/
|
||||||
|
class FASTDEPLOY_DECL StructureV2Table : public FastDeployModel {
|
||||||
|
public:
|
||||||
|
StructureV2Table();
|
||||||
|
/** \brief Set path of model file, and the configuration of runtime
|
||||||
|
*
|
||||||
|
* \param[in] model_file Path of model file, e.g ./en_ppstructure_mobile_v2.0_SLANet_infer/model.pdmodel.
|
||||||
|
* \param[in] params_file Path of parameter file, e.g ./en_ppstructure_mobile_v2.0_SLANet_infer/model.pdiparams, if the model format is ONNX, this parameter will be ignored.
|
||||||
|
* \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in `valid_cpu_backends`.
|
||||||
|
* \param[in] model_format Model format of the loaded model, default is Paddle format.
|
||||||
|
*/
|
||||||
|
StructureV2Table(const std::string& model_file,
|
||||||
|
const std::string& params_file = "",
|
||||||
|
const std::string& table_char_dict_path = "",
|
||||||
|
const RuntimeOption& custom_option = RuntimeOption(),
|
||||||
|
const ModelFormat& model_format = ModelFormat::PADDLE);
|
||||||
|
|
||||||
|
/** \brief Clone a new StructureV2Table Recognizer with less memory usage when multiple instances of the same model are created
|
||||||
|
*
|
||||||
|
* \return new StructureV2Table* type unique pointer
|
||||||
|
*/
|
||||||
|
virtual std::unique_ptr<StructureV2Table> Clone() const;
|
||||||
|
|
||||||
|
/// Get model's name
|
||||||
|
std::string ModelName() const { return "ppocr/ocr_table"; }
|
||||||
|
|
||||||
|
/** \brief Predict the input image and get OCR detection model result.
|
||||||
|
*
|
||||||
|
* \param[in] img The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format.
|
||||||
|
* \param[in] boxes_result The output of OCR detection model result will be writen to this structure.
|
||||||
|
* \return true if the prediction is successed, otherwise false.
|
||||||
|
*/
|
||||||
|
virtual bool Predict(const cv::Mat& img,
|
||||||
|
std::vector<std::array<int, 8>>* boxes_result,
|
||||||
|
std::vector<std::string>* structure_result);
|
||||||
|
|
||||||
|
/** \brief Predict the input image and get OCR detection model result.
|
||||||
|
*
|
||||||
|
* \param[in] img The input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format.
|
||||||
|
* \param[in] ocr_result The output of OCR detection model result will be writen to this structure.
|
||||||
|
* \return true if the prediction is successed, otherwise false.
|
||||||
|
*/
|
||||||
|
virtual bool Predict(const cv::Mat& img, vision::OCRResult* ocr_result);
|
||||||
|
|
||||||
|
/** \brief BatchPredict the input image and get OCR detection model result.
|
||||||
|
*
|
||||||
|
* \param[in] images The list input of image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format.
|
||||||
|
* \param[in] det_results The output of OCR detection model result will be writen to this structure.
|
||||||
|
* \return true if the prediction is successed, otherwise false.
|
||||||
|
*/
|
||||||
|
virtual bool BatchPredict(const std::vector<cv::Mat>& images,
|
||||||
|
std::vector<std::vector<std::array<int, 8>>>* det_results,
|
||||||
|
std::vector<std::vector<std::string>>* structure_results);
|
||||||
|
|
||||||
|
/** \brief BatchPredict the input image and get OCR detection model result.
|
||||||
|
*
|
||||||
|
* \param[in] images The list input of image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format.
|
||||||
|
* \param[in] ocr_results The output of OCR detection model result will be writen to this structure.
|
||||||
|
* \return true if the prediction is successed, otherwise false.
|
||||||
|
*/
|
||||||
|
virtual bool BatchPredict(const std::vector<cv::Mat>& images,
|
||||||
|
std::vector<vision::OCRResult>* ocr_results);
|
||||||
|
|
||||||
|
/// Get preprocessor reference of StructureV2TablePreprocessor
|
||||||
|
virtual StructureV2TablePreprocessor& GetPreprocessor() {
|
||||||
|
return preprocessor_;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get postprocessor reference of StructureV2TablePostprocessor
|
||||||
|
virtual StructureV2TablePostprocessor& GetPostprocessor() {
|
||||||
|
return postprocessor_;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool Initialize();
|
||||||
|
StructureV2TablePreprocessor preprocessor_;
|
||||||
|
StructureV2TablePostprocessor postprocessor_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace ocr
|
||||||
|
} // namespace vision
|
||||||
|
} // namespace fastdeploy
|
170
fastdeploy/vision/ocr/ppocr/structurev2_table_postprocessor.cc
Normal file
170
fastdeploy/vision/ocr/ppocr/structurev2_table_postprocessor.cc
Normal file
@@ -0,0 +1,170 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "fastdeploy/vision/ocr/ppocr/structurev2_table_postprocessor.h"
|
||||||
|
|
||||||
|
#include "fastdeploy/utils/perf.h"
|
||||||
|
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
|
||||||
|
|
||||||
|
namespace fastdeploy {
|
||||||
|
namespace vision {
|
||||||
|
namespace ocr {
|
||||||
|
|
||||||
|
StructureV2TablePostprocessor::StructureV2TablePostprocessor() {
|
||||||
|
initialized_ = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
StructureV2TablePostprocessor::StructureV2TablePostprocessor(
|
||||||
|
const std::string& dict_path) {
|
||||||
|
std::ifstream in(dict_path);
|
||||||
|
FDASSERT(in, "Cannot open file %s to read.", dict_path.c_str());
|
||||||
|
std::string line;
|
||||||
|
dict_character.clear();
|
||||||
|
dict_character.push_back("sos"); // add special character
|
||||||
|
while (getline(in, line)) {
|
||||||
|
dict_character.push_back(line);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (merge_no_span_structure) {
|
||||||
|
if (std::find(dict_character.begin(), dict_character.end(), "<td></td>") ==
|
||||||
|
dict_character.end()) {
|
||||||
|
dict_character.push_back("<td></td>");
|
||||||
|
}
|
||||||
|
for (auto it = dict_character.begin(); it != dict_character.end();) {
|
||||||
|
if (*it == "<td>") {
|
||||||
|
it = dict_character.erase(it);
|
||||||
|
} else {
|
||||||
|
++it;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
dict_character.push_back("eos"); // add special character
|
||||||
|
dict.clear();
|
||||||
|
for (size_t i = 0; i < dict_character.size(); i++) {
|
||||||
|
dict[dict_character[i]] = int(i);
|
||||||
|
if (dict_character[i] == "beg") {
|
||||||
|
ignore_beg_token_idx = i;
|
||||||
|
} else if (dict_character[i] == "end") {
|
||||||
|
ignore_end_token_idx = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dict_end_idx = dict_character.size() - 1;
|
||||||
|
|
||||||
|
initialized_ = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool StructureV2TablePostprocessor::SingleBatchPostprocessor(
|
||||||
|
const float* structure_probs, const float* bbox_preds, size_t slice_dim,
|
||||||
|
size_t prob_dim, size_t box_dim, int img_width, int img_height,
|
||||||
|
std::vector<std::array<int, 8>>* boxes_result,
|
||||||
|
std::vector<std::string>* structure_list_result) {
|
||||||
|
structure_list_result->push_back("<html>");
|
||||||
|
structure_list_result->push_back("<body>");
|
||||||
|
structure_list_result->push_back("<table>");
|
||||||
|
|
||||||
|
for (int i = 0; i < slice_dim; i++) {
|
||||||
|
int structure_idx = 0;
|
||||||
|
float structure_prob = structure_probs[i * prob_dim];
|
||||||
|
for (int j = 0; j < prob_dim; j++) {
|
||||||
|
if (structure_probs[i * prob_dim + j] > structure_prob) {
|
||||||
|
structure_prob = structure_probs[i * prob_dim + j];
|
||||||
|
structure_idx = j;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (structure_idx > 0 && structure_idx == dict_end_idx) break;
|
||||||
|
|
||||||
|
if (structure_idx == ignore_end_token_idx ||
|
||||||
|
structure_idx == ignore_beg_token_idx)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
std::string text = dict_character[structure_idx];
|
||||||
|
if (std::find(td_tokens.begin(), td_tokens.end(), text) !=
|
||||||
|
td_tokens.end()) {
|
||||||
|
std::array<int, 8> bbox;
|
||||||
|
// box dim: en->4, ch->8
|
||||||
|
if (box_dim == 4) {
|
||||||
|
bbox[0] = bbox_preds[i * box_dim] * img_width;
|
||||||
|
bbox[1] = bbox_preds[i * box_dim + 1] * img_height;
|
||||||
|
|
||||||
|
bbox[2] = bbox_preds[i * box_dim + 2] * img_width;
|
||||||
|
bbox[3] = bbox_preds[i * box_dim + 1] * img_height;
|
||||||
|
|
||||||
|
bbox[4] = bbox_preds[i * box_dim + 2] * img_width;
|
||||||
|
bbox[5] = bbox_preds[i * box_dim + 3] * img_height;
|
||||||
|
|
||||||
|
bbox[6] = bbox_preds[i * box_dim] * img_width;
|
||||||
|
bbox[7] = bbox_preds[i * box_dim + 3] * img_height;
|
||||||
|
} else {
|
||||||
|
for (int k = 0; k < 8; k++) {
|
||||||
|
float bbox_pred = bbox_preds[i * box_dim + k];
|
||||||
|
bbox[k] =
|
||||||
|
int(k % 2 == 0 ? bbox_pred * img_width : bbox_pred * img_height);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
boxes_result->push_back(bbox);
|
||||||
|
}
|
||||||
|
structure_list_result->push_back(text);
|
||||||
|
}
|
||||||
|
structure_list_result->push_back("</table>");
|
||||||
|
structure_list_result->push_back("</body>");
|
||||||
|
structure_list_result->push_back("</html>");
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool StructureV2TablePostprocessor::Run(
|
||||||
|
const std::vector<FDTensor>& tensors,
|
||||||
|
std::vector<std::vector<std::array<int, 8>>>* bbox_batch_list,
|
||||||
|
std::vector<std::vector<std::string>>* structure_batch_list,
|
||||||
|
const std::vector<std::array<int, 4>>& batch_det_img_info) {
|
||||||
|
// Table have 2 output tensors.
|
||||||
|
const FDTensor& structure_probs = tensors[1];
|
||||||
|
const FDTensor& bbox_preds = tensors[0];
|
||||||
|
|
||||||
|
const float* structure_probs_data =
|
||||||
|
reinterpret_cast<const float*>(structure_probs.Data());
|
||||||
|
size_t structure_probs_length =
|
||||||
|
accumulate(structure_probs.shape.begin() + 1, structure_probs.shape.end(),
|
||||||
|
1, std::multiplies<int>());
|
||||||
|
|
||||||
|
const float* bbox_preds_data =
|
||||||
|
reinterpret_cast<const float*>(bbox_preds.Data());
|
||||||
|
size_t bbox_preds_length =
|
||||||
|
accumulate(bbox_preds.shape.begin() + 1, bbox_preds.shape.end(), 1,
|
||||||
|
std::multiplies<int>());
|
||||||
|
size_t batch = bbox_preds.shape[0];
|
||||||
|
size_t slice_dim = bbox_preds.shape[1];
|
||||||
|
size_t prob_dim = structure_probs.shape[2];
|
||||||
|
size_t box_dim = bbox_preds.shape[2];
|
||||||
|
|
||||||
|
bbox_batch_list->resize(batch);
|
||||||
|
structure_batch_list->resize(batch);
|
||||||
|
|
||||||
|
for (int i_batch = 0; i_batch < batch; ++i_batch) {
|
||||||
|
SingleBatchPostprocessor(
|
||||||
|
structure_probs_data, bbox_preds_data, slice_dim, prob_dim, box_dim,
|
||||||
|
batch_det_img_info[i_batch][0], batch_det_img_info[i_batch][1],
|
||||||
|
&bbox_batch_list->at(i_batch), &structure_batch_list->at(i_batch));
|
||||||
|
structure_probs_data = structure_probs_data + structure_probs_length;
|
||||||
|
bbox_preds_data = bbox_preds_data + bbox_preds_length;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace ocr
|
||||||
|
} // namespace vision
|
||||||
|
} // namespace fastdeploy
|
@@ -0,0 +1,71 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
#include "fastdeploy/vision/common/processors/transform.h"
|
||||||
|
#include "fastdeploy/vision/common/result.h"
|
||||||
|
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_postprocess_op.h"
|
||||||
|
|
||||||
|
namespace fastdeploy {
|
||||||
|
namespace vision {
|
||||||
|
|
||||||
|
namespace ocr {
|
||||||
|
/*! @brief Postprocessor object for DBDetector serials model.
|
||||||
|
*/
|
||||||
|
class FASTDEPLOY_DECL StructureV2TablePostprocessor {
|
||||||
|
public:
|
||||||
|
StructureV2TablePostprocessor();
|
||||||
|
/** \brief Create a postprocessor instance for Recognizer serials model
|
||||||
|
*
|
||||||
|
* \param[in] label_path The path of label_dict
|
||||||
|
*/
|
||||||
|
explicit StructureV2TablePostprocessor(const std::string& dict_path);
|
||||||
|
|
||||||
|
/** \brief Process the result of runtime and fill to RecognizerResult
|
||||||
|
*
|
||||||
|
* \param[in] tensors The inference result from runtime
|
||||||
|
* \param[in] texts The output text results of recognizer
|
||||||
|
* \param[in] rec_scores The output score results of recognizer
|
||||||
|
* \return true if the postprocess successed, otherwise false
|
||||||
|
*/
|
||||||
|
bool Run(const std::vector<FDTensor>& tensors,
|
||||||
|
std::vector<std::vector<std::array<int, 8>>>* bbox_batch_list,
|
||||||
|
std::vector<std::vector<std::string>>* structure_batch_list,
|
||||||
|
const std::vector<std::array<int, 4>>& batch_det_img_info);
|
||||||
|
|
||||||
|
private:
|
||||||
|
PostProcessor util_post_processor_;
|
||||||
|
bool SingleBatchPostprocessor(const float* structure_probs,
|
||||||
|
const float* bbox_preds,
|
||||||
|
size_t slice_dim,
|
||||||
|
size_t prob_dim,
|
||||||
|
size_t box_dim,
|
||||||
|
int img_width,
|
||||||
|
int img_height,
|
||||||
|
std::vector<std::array<int, 8>>* boxes_result,
|
||||||
|
std::vector<std::string>* structure_list_result);
|
||||||
|
|
||||||
|
bool merge_no_span_structure{true};
|
||||||
|
std::vector<std::string> dict_character;
|
||||||
|
std::vector<std::string> td_tokens{"<td>", "<td", "<td></td>"};
|
||||||
|
std::map<std::string, int> dict;
|
||||||
|
int ignore_beg_token_idx;
|
||||||
|
int ignore_end_token_idx;
|
||||||
|
int dict_end_idx;
|
||||||
|
bool initialized_ = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace ocr
|
||||||
|
} // namespace vision
|
||||||
|
} // namespace fastdeploy
|
105
fastdeploy/vision/ocr/ppocr/structurev2_table_preprocessor.cc
Normal file
105
fastdeploy/vision/ocr/ppocr/structurev2_table_preprocessor.cc
Normal file
@@ -0,0 +1,105 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "fastdeploy/vision/ocr/ppocr/structurev2_table_preprocessor.h"
|
||||||
|
|
||||||
|
#include "fastdeploy/function/concat.h"
|
||||||
|
#include "fastdeploy/utils/perf.h"
|
||||||
|
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
|
||||||
|
|
||||||
|
namespace fastdeploy {
|
||||||
|
namespace vision {
|
||||||
|
namespace ocr {
|
||||||
|
|
||||||
|
StructureV2TablePreprocessor::StructureV2TablePreprocessor() {
|
||||||
|
resize_op_ = std::make_shared<Resize>(-1, -1);
|
||||||
|
|
||||||
|
std::vector<float> value = {0, 0, 0};
|
||||||
|
pad_op_ = std::make_shared<Pad>(0, 0, 0, 0, value);
|
||||||
|
|
||||||
|
std::vector<float> mean = {0.485f, 0.456f, 0.406f};
|
||||||
|
std::vector<float> std = {0.229f, 0.224f, 0.225f};
|
||||||
|
normalize_op_ = std::make_shared<Normalize>(mean, std, true);
|
||||||
|
hwc2chw_op_ = std::make_shared<HWC2CHW>();
|
||||||
|
}
|
||||||
|
|
||||||
|
void StructureV2TablePreprocessor::StructureV2TableResizeImage(FDMat* mat,
|
||||||
|
int batch_idx) {
|
||||||
|
float img_h = float(rec_image_shape_[1]);
|
||||||
|
float img_w = float(rec_image_shape_[2]);
|
||||||
|
float width = float(mat->Width());
|
||||||
|
float height = float(mat->Height());
|
||||||
|
float ratio = max_len / (std::max(height, width) * 1.0);
|
||||||
|
int resize_h = int(height * ratio);
|
||||||
|
int resize_w = int(width * ratio);
|
||||||
|
|
||||||
|
resize_op_->SetWidthAndHeight(resize_w, resize_h);
|
||||||
|
(*resize_op_)(mat);
|
||||||
|
|
||||||
|
(*normalize_op_)(mat);
|
||||||
|
pad_op_->SetPaddingSize(0, int(max_len - resize_h), 0,
|
||||||
|
int(max_len - resize_w));
|
||||||
|
(*pad_op_)(mat);
|
||||||
|
|
||||||
|
(*hwc2chw_op_)(mat);
|
||||||
|
batch_det_img_info_[batch_idx] = {int(width), int(height), resize_w,
|
||||||
|
resize_h};
|
||||||
|
}
|
||||||
|
|
||||||
|
bool StructureV2TablePreprocessor::Run(std::vector<FDMat>* images,
|
||||||
|
std::vector<FDTensor>* outputs,
|
||||||
|
size_t start_index, size_t end_index,
|
||||||
|
const std::vector<int>& indices) {
|
||||||
|
if (images->size() == 0 || end_index <= start_index ||
|
||||||
|
end_index > images->size()) {
|
||||||
|
FDERROR << "images->size() or index error. Correct is: 0 <= start_index < "
|
||||||
|
"end_index <= images->size()"
|
||||||
|
<< std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<FDMat> mats(end_index - start_index);
|
||||||
|
for (size_t i = start_index; i < end_index; ++i) {
|
||||||
|
size_t real_index = i;
|
||||||
|
if (indices.size() != 0) {
|
||||||
|
real_index = indices[i];
|
||||||
|
}
|
||||||
|
mats[i - start_index] = images->at(real_index);
|
||||||
|
}
|
||||||
|
return Run(&mats, outputs);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool StructureV2TablePreprocessor::Apply(FDMatBatch* image_batch,
|
||||||
|
std::vector<FDTensor>* outputs) {
|
||||||
|
batch_det_img_info_.clear();
|
||||||
|
batch_det_img_info_.resize(image_batch->mats->size());
|
||||||
|
for (size_t i = 0; i < image_batch->mats->size(); ++i) {
|
||||||
|
FDMat* mat = &(image_batch->mats->at(i));
|
||||||
|
StructureV2TableResizeImage(mat, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only have 1 output Tensor.
|
||||||
|
outputs->resize(1);
|
||||||
|
// Get the NCHW tensor
|
||||||
|
FDTensor* tensor = image_batch->Tensor();
|
||||||
|
(*outputs)[0].SetExternalData(tensor->Shape(), tensor->Dtype(),
|
||||||
|
tensor->Data(), tensor->device,
|
||||||
|
tensor->device_id);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace ocr
|
||||||
|
} // namespace vision
|
||||||
|
} // namespace fastdeploy
|
74
fastdeploy/vision/ocr/ppocr/structurev2_table_preprocessor.h
Normal file
74
fastdeploy/vision/ocr/ppocr/structurev2_table_preprocessor.h
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
#include "fastdeploy/vision/common/processors/transform.h"
|
||||||
|
#include "fastdeploy/vision/common/processors/manager.h"
|
||||||
|
#include "fastdeploy/vision/common/result.h"
|
||||||
|
|
||||||
|
namespace fastdeploy {
|
||||||
|
namespace vision {
|
||||||
|
|
||||||
|
namespace ocr {
|
||||||
|
/*! @brief Preprocessor object for table model.
|
||||||
|
*/
|
||||||
|
class FASTDEPLOY_DECL StructureV2TablePreprocessor : public ProcessorManager {
|
||||||
|
public:
|
||||||
|
StructureV2TablePreprocessor();
|
||||||
|
using ProcessorManager::Run;
|
||||||
|
/** \brief Process the input image and prepare input tensors for runtime
|
||||||
|
*
|
||||||
|
* \param[in] images The input data list, all the elements are FDMat
|
||||||
|
* \param[in] outputs The output tensors which will be fed into runtime
|
||||||
|
* \return true if the preprocess successed, otherwise false
|
||||||
|
*/
|
||||||
|
bool Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs,
|
||||||
|
size_t start_index, size_t end_index,
|
||||||
|
const std::vector<int>& indices);
|
||||||
|
|
||||||
|
/** \brief Implement the virtual function of ProcessorManager, Apply() is the
|
||||||
|
* body of Run(). Apply() contains the main logic of preprocessing, Run() is
|
||||||
|
* called by users to execute preprocessing
|
||||||
|
*
|
||||||
|
* \param[in] image_batch The input image batch
|
||||||
|
* \param[in] outputs The output tensors which will feed in runtime
|
||||||
|
* \return true if the preprocess successed, otherwise false
|
||||||
|
*/
|
||||||
|
virtual bool Apply(FDMatBatch* image_batch, std::vector<FDTensor>* outputs);
|
||||||
|
|
||||||
|
/// Get the image info of the last batch, return a list of array
|
||||||
|
/// {image width, image height, resize width, resize height}
|
||||||
|
const std::vector<std::array<int, 4>>* GetBatchImgInfo() {
|
||||||
|
return &batch_det_img_info_;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
void StructureV2TableResizeImage(FDMat* mat, int batch_idx);
|
||||||
|
// for recording the switch of hwc2chw
|
||||||
|
bool disable_permute_ = false;
|
||||||
|
// for recording the switch of normalize
|
||||||
|
bool disable_normalize_ = false;
|
||||||
|
int max_len = 488;
|
||||||
|
std::vector<int> rec_image_shape_ = {3, max_len, max_len};
|
||||||
|
bool static_shape_infer_ = false;
|
||||||
|
std::shared_ptr<Resize> resize_op_;
|
||||||
|
std::shared_ptr<Pad> pad_op_;
|
||||||
|
std::shared_ptr<Normalize> normalize_op_;
|
||||||
|
std::shared_ptr<HWC2CHW> hwc2chw_op_;
|
||||||
|
std::vector<std::array<int, 4>> batch_det_img_info_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace ocr
|
||||||
|
} // namespace vision
|
||||||
|
} // namespace fastdeploy
|
89
fastdeploy/vision/ocr/ppocr/utils/matcher.cc
Normal file
89
fastdeploy/vision/ocr/ppocr/utils/matcher.cc
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
|
||||||
|
|
||||||
|
namespace fastdeploy {
|
||||||
|
namespace vision {
|
||||||
|
namespace ocr {
|
||||||
|
|
||||||
|
std::vector<int> Xyxyxyxy2Xyxy(std::array<int, 8> &box) {
|
||||||
|
int x_collect[4] = {box[0], box[2], box[4], box[6]};
|
||||||
|
int y_collect[4] = {box[1], box[3], box[5], box[7]};
|
||||||
|
int left = int(*std::min_element(x_collect, x_collect + 4));
|
||||||
|
int right = int(*std::max_element(x_collect, x_collect + 4));
|
||||||
|
int top = int(*std::min_element(y_collect, y_collect + 4));
|
||||||
|
int bottom = int(*std::max_element(y_collect, y_collect + 4));
|
||||||
|
std::vector<int> box1(4, 0);
|
||||||
|
box1[0] = left;
|
||||||
|
box1[1] = top;
|
||||||
|
box1[2] = right;
|
||||||
|
box1[3] = bottom;
|
||||||
|
return box1;
|
||||||
|
}
|
||||||
|
|
||||||
|
float Dis(std::vector<int> &box1, std::vector<int> &box2) {
|
||||||
|
float x1_1 = float(box1[0]);
|
||||||
|
float y1_1 = float(box1[1]);
|
||||||
|
float x2_1 = float(box1[2]);
|
||||||
|
float y2_1 = float(box1[3]);
|
||||||
|
|
||||||
|
float x1_2 = float(box2[0]);
|
||||||
|
float y1_2 = float(box2[1]);
|
||||||
|
float x2_2 = float(box2[2]);
|
||||||
|
float y2_2 = float(box2[3]);
|
||||||
|
|
||||||
|
float dis = std::abs(x1_2 - x1_1) + std::abs(y1_2 - y1_1) +
|
||||||
|
std::abs(x2_2 - x2_1) + std::abs(y2_2 - y2_1);
|
||||||
|
float dis_2 = std::abs(x1_2 - x1_1) + std::abs(y1_2 - y1_1);
|
||||||
|
float dis_3 = std::abs(x2_2 - x2_1) + std::abs(y2_2 - y2_1);
|
||||||
|
return dis + std::min(dis_2, dis_3);
|
||||||
|
}
|
||||||
|
|
||||||
|
float Iou(std::vector<int> &box1, std::vector<int> &box2) {
|
||||||
|
int area1 = std::max(0, box1[2] - box1[0]) * std::max(0, box1[3] - box1[1]);
|
||||||
|
int area2 = std::max(0, box2[2] - box2[0]) * std::max(0, box2[3] - box2[1]);
|
||||||
|
|
||||||
|
// computing the sum_area
|
||||||
|
int sum_area = area1 + area2;
|
||||||
|
|
||||||
|
// find the each point of intersect rectangle
|
||||||
|
int x1 = std::max(box1[0], box2[0]);
|
||||||
|
int y1 = std::max(box1[1], box2[1]);
|
||||||
|
int x2 = std::min(box1[2], box2[2]);
|
||||||
|
int y2 = std::min(box1[3], box2[3]);
|
||||||
|
|
||||||
|
// judge if there is an intersect
|
||||||
|
if (y1 >= y2 || x1 >= x2) {
|
||||||
|
return 0.0;
|
||||||
|
} else {
|
||||||
|
int intersect = (x2 - x1) * (y2 - y1);
|
||||||
|
return intersect / (sum_area - intersect + 0.00000001);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ComparisonDis(const std::vector<float> &dis1,
|
||||||
|
const std::vector<float> &dis2) {
|
||||||
|
if (dis1[1] < dis2[1]) {
|
||||||
|
return true;
|
||||||
|
} else if (dis1[1] == dis2[1]) {
|
||||||
|
return dis1[0] < dis2[0];
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace ocr
|
||||||
|
} // namespace vision
|
||||||
|
} // namespace fastdeploy
|
@@ -34,6 +34,15 @@ FASTDEPLOY_DECL void SortBoxes(std::vector<std::array<int, 8>>* boxes);
|
|||||||
|
|
||||||
FASTDEPLOY_DECL std::vector<int> ArgSort(const std::vector<float> &array);
|
FASTDEPLOY_DECL std::vector<int> ArgSort(const std::vector<float> &array);
|
||||||
|
|
||||||
|
FASTDEPLOY_DECL std::vector<int> Xyxyxyxy2Xyxy(std::array<int, 8> &box);
|
||||||
|
|
||||||
|
FASTDEPLOY_DECL float Dis(std::vector<int> &box1, std::vector<int> &box2);
|
||||||
|
|
||||||
|
FASTDEPLOY_DECL float Iou(std::vector<int> &box1, std::vector<int> &box2);
|
||||||
|
|
||||||
|
FASTDEPLOY_DECL bool ComparisonDis(const std::vector<float> &dis1,
|
||||||
|
const std::vector<float> &dis2);
|
||||||
|
|
||||||
} // namespace ocr
|
} // namespace ocr
|
||||||
} // namespace vision
|
} // namespace vision
|
||||||
} // namespace fastdeploy
|
} // namespace fastdeploy
|
||||||
|
@@ -648,6 +648,107 @@ class Recognizer(FastDeployModel):
|
|||||||
self._model.preprocessor.rec_image_shape = value
|
self._model.preprocessor.rec_image_shape = value
|
||||||
|
|
||||||
|
|
||||||
|
class StructureV2TablePreprocessor:
|
||||||
|
def __init__(self):
|
||||||
|
"""Create a preprocessor for StructureV2TableModel
|
||||||
|
"""
|
||||||
|
self._preprocessor = C.vision.ocr.StructureV2TablePreprocessor()
|
||||||
|
|
||||||
|
def run(self, input_ims):
|
||||||
|
"""Preprocess input images for StructureV2TableModel
|
||||||
|
:param: input_ims: (list of numpy.ndarray)The input image
|
||||||
|
:return: list of FDTensor
|
||||||
|
"""
|
||||||
|
return self._preprocessor.run(input_ims)
|
||||||
|
|
||||||
|
|
||||||
|
class StructureV2TablePostprocessor:
|
||||||
|
def __init__(self):
|
||||||
|
"""Create a postprocessor for StructureV2TableModel
|
||||||
|
"""
|
||||||
|
self._postprocessor = C.vision.ocr.StructureV2TablePostprocessor()
|
||||||
|
|
||||||
|
def run(self, runtime_results):
|
||||||
|
"""Postprocess the runtime results for StructureV2TableModel
|
||||||
|
:param: runtime_results: (list of FDTensor or list of pyArray)The output FDTensor results from runtime
|
||||||
|
:return: list of Result(If the runtime_results is predict by batched samples, the length of this list equals to the batch size)
|
||||||
|
"""
|
||||||
|
return self._postprocessor.run(runtime_results)
|
||||||
|
|
||||||
|
|
||||||
|
class StructureV2Table(FastDeployModel):
|
||||||
|
def __init__(self,
|
||||||
|
model_file="",
|
||||||
|
params_file="",
|
||||||
|
table_char_dict_path="",
|
||||||
|
runtime_option=None,
|
||||||
|
model_format=ModelFormat.PADDLE):
|
||||||
|
"""Load OCR StructureV2Table model provided by PaddleOCR.
|
||||||
|
|
||||||
|
:param model_file: (str)Path of model file, e.g ./ch_ppocr_mobile_v2.0_cls_infer/model.pdmodel.
|
||||||
|
:param params_file: (str)Path of parameter file, e.g ./ch_ppocr_mobile_v2.0_cls_infer/model.pdiparams, if the model format is ONNX, this parameter will be ignored.
|
||||||
|
:param runtime_option: (fastdeploy.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU.
|
||||||
|
:param model_format: (fastdeploy.ModelForamt)Model format of the loaded model.
|
||||||
|
"""
|
||||||
|
super(StructureV2Table, self).__init__(runtime_option)
|
||||||
|
|
||||||
|
if (len(model_file) == 0):
|
||||||
|
self._model = C.vision.ocr.StructureV2Table()
|
||||||
|
self._runnable = False
|
||||||
|
else:
|
||||||
|
self._model = C.vision.ocr.StructureV2Table(
|
||||||
|
model_file, params_file, table_char_dict_path,
|
||||||
|
self._runtime_option, model_format)
|
||||||
|
assert self.initialized, "Classifier initialize failed."
|
||||||
|
self._runnable = True
|
||||||
|
|
||||||
|
def clone(self):
|
||||||
|
"""Clone OCR StructureV2Table model object
|
||||||
|
:return: a new OCR StructureV2Table model object
|
||||||
|
"""
|
||||||
|
|
||||||
|
class StructureV2TableClone(StructureV2Table):
|
||||||
|
def __init__(self, model):
|
||||||
|
self._model = model
|
||||||
|
|
||||||
|
clone_model = StructureV2TableClone(self._model.clone())
|
||||||
|
return clone_model
|
||||||
|
|
||||||
|
def predict(self, input_image):
|
||||||
|
"""Predict an input image
|
||||||
|
:param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
|
||||||
|
:return: bbox, structure
|
||||||
|
"""
|
||||||
|
if self._runnable:
|
||||||
|
return self._model.predict(input_image)
|
||||||
|
return False
|
||||||
|
|
||||||
|
def batch_predict(self, images):
|
||||||
|
"""Predict a batch of input image
|
||||||
|
:param images: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
|
||||||
|
:return: list of bbox list, list of structure
|
||||||
|
"""
|
||||||
|
if self._runnable:
|
||||||
|
return self._model.batch_predict(images)
|
||||||
|
return False
|
||||||
|
|
||||||
|
@property
|
||||||
|
def preprocessor(self):
|
||||||
|
return self._model.preprocessor
|
||||||
|
|
||||||
|
@preprocessor.setter
|
||||||
|
def preprocessor(self, value):
|
||||||
|
self._model.preprocessor = value
|
||||||
|
|
||||||
|
@property
|
||||||
|
def postprocessor(self):
|
||||||
|
return self._model.postprocessor
|
||||||
|
|
||||||
|
@postprocessor.setter
|
||||||
|
def postprocessor(self, value):
|
||||||
|
self._model.postprocessor = value
|
||||||
|
|
||||||
|
|
||||||
class PPOCRv3(FastDeployModel):
|
class PPOCRv3(FastDeployModel):
|
||||||
def __init__(self, det_model=None, cls_model=None, rec_model=None):
|
def __init__(self, det_model=None, cls_model=None, rec_model=None):
|
||||||
"""Consruct a pipeline with text detector, direction classifier and text recognizer models
|
"""Consruct a pipeline with text detector, direction classifier and text recognizer models
|
||||||
@@ -800,3 +901,58 @@ class PPOCRSystemv2(PPOCRv2):
|
|||||||
|
|
||||||
def predict(self, input_image):
|
def predict(self, input_image):
|
||||||
return super(PPOCRSystemv2, self).predict(input_image)
|
return super(PPOCRSystemv2, self).predict(input_image)
|
||||||
|
|
||||||
|
|
||||||
|
class PPStructureV2Table(FastDeployModel):
|
||||||
|
def __init__(self, det_model=None, rec_model=None, table_model=None):
|
||||||
|
"""Consruct a pipeline with text detector, text recognizer and table recognizer models
|
||||||
|
|
||||||
|
:param det_model: (FastDeployModel) The detection model object created by fastdeploy.vision.ocr.DBDetector.
|
||||||
|
:param rec_model: (FastDeployModel) The recognition model object created by fastdeploy.vision.ocr.Recognizer.
|
||||||
|
:param table_model: (FastDeployModel) The table recognition model object created by fastdeploy.vision.ocr.Table.
|
||||||
|
"""
|
||||||
|
assert det_model is not None and rec_model is not None and table_model is not None, "The det_model, rec_model and table_model cannot be None."
|
||||||
|
self.system_ = C.vision.ocr.PPStructureV2Table(
|
||||||
|
det_model._model,
|
||||||
|
rec_model._model,
|
||||||
|
table_model._model, )
|
||||||
|
|
||||||
|
def clone(self):
|
||||||
|
"""Clone PPStructureV2Table pipeline object
|
||||||
|
:return: a new PPStructureV2Table pipeline object
|
||||||
|
"""
|
||||||
|
|
||||||
|
class PPStructureV2TableClone(PPStructureV2Table):
|
||||||
|
def __init__(self, system):
|
||||||
|
self.system_ = system
|
||||||
|
|
||||||
|
clone_model = PPStructureV2TableClone(self.system_.clone())
|
||||||
|
return clone_model
|
||||||
|
|
||||||
|
def predict(self, input_image):
|
||||||
|
"""Predict an input image
|
||||||
|
|
||||||
|
:param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
|
||||||
|
:return: OCRResult
|
||||||
|
"""
|
||||||
|
return self.system_.predict(input_image)
|
||||||
|
|
||||||
|
def batch_predict(self, images):
|
||||||
|
"""Predict a batch of input image
|
||||||
|
:param images: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
|
||||||
|
:return: OCRBatchResult
|
||||||
|
"""
|
||||||
|
|
||||||
|
return self.system_.batch_predict(images)
|
||||||
|
|
||||||
|
|
||||||
|
class PPStructureV2TableSystem(PPStructureV2Table):
|
||||||
|
def __init__(self, det_model=None, rec_model=None, table_model=None):
|
||||||
|
logging.warning(
|
||||||
|
"DEPRECATED: fd.vision.ocr.PPStructureV2TableSystem is deprecated, "
|
||||||
|
"please use fd.vision.ocr.PPStructureV2Table instead.")
|
||||||
|
super(PPStructureV2TableSystem, self).__init__(det_model, rec_model,
|
||||||
|
table_model)
|
||||||
|
|
||||||
|
def predict(self, input_image):
|
||||||
|
return super(PPStructureV2TableSystem, self).predict(input_image)
|
||||||
|
Reference in New Issue
Block a user