[Hackthon_4th 242] Support en_ppstructure_mobile_v2.0_SLANet (#1816)

* first draft * update api name * fix bug * fix bug and * fix bug in c api * fix bug in c_api --------- Co-authored-by: DefTruth <31974251+DefTruth@users.noreply.github.com>
2025-10-06 09:07:10 +08:00 · 2023-04-27 10:45:14 +08:00
parent ef576ce875
commit 2c5fd91a7f
35 changed files with 2505 additions and 39 deletions
--- a/examples/vision/ocr/PP-OCR/cpu-gpu/python/README.md
+++ b/examples/vision/ocr/PP-OCR/cpu-gpu/python/README.md
@@ -36,10 +36,15 @@ tar -xvf ch_ppocr_mobile_v2.0_cls_infer.tar
 # 下载PP-OCRv3文字识别模型
 wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar
 tar -xvf ch_PP-OCRv3_rec_infer.tar
+# 下载PPStructureV2表格识别模型
+wget https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar
+tar xf ch_ppstructure_mobile_v2.0_SLANet_infer.tar

 # 下载预测图片与字典文件
 wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/doc/imgs/12.jpg
+wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppstructure/docs/table/table.jpg
 wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/ppocr_keys_v1.txt
+wget https://gitee.com/paddlepaddle/PaddleOCR/raw/release/2.6/ppocr/utils/dict/table_structure_dict_ch.txt

 # 运行部署示例
 # 在CPU上使用Paddle Inference推理
@@ -71,6 +76,8 @@ python infer_cls.py --cls_model ch_ppocr_mobile_v2.0_cls_infer --image 12.jpg --
 # 在CPU上,单独使用文字识别模型部署
 python infer_rec.py  --rec_model ch_PP-OCRv3_rec_infer --rec_label_file ppocr_keys_v1.txt --image 12.jpg --device cpu

+# 在CPU上,单独使用文字识别模型部署
+python infer_structurev2_table.py  --table_model ./ch_ppstructure_mobile_v2.0_SLANet_infer --table_char_dict_path ./table_structure_dict_ch.txt --image table.jpg --device cpu
 ```

 运行完成可视化结果如下图所示
--- a/examples/vision/ocr/PP-OCR/cpu-gpu/python/infer_ppstructurev2_table.py
+++ b/examples/vision/ocr/PP-OCR/cpu-gpu/python/infer_ppstructurev2_table.py
@@ -0,0 +1,175 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import fastdeploy as fd
+import cv2
+import os
+
+
+def parse_arguments():
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--det_model", required=True, help="Path of Detection model of PPOCR.")
+    parser.add_argument(
+        "--rec_model",
+        required=True,
+        help="Path of Recognization model of PPOCR.")
+    parser.add_argument(
+        "--table_model",
+        required=True,
+        help="Path of Table recognition model of PPOCR.")
+    parser.add_argument(
+        "--rec_label_file",
+        required=True,
+        help="Path of Recognization model of PPOCR.")
+    parser.add_argument(
+        "--table_char_dict_path",
+        type=str,
+        required=True,
+        help="tabel recognition dict path.")
+    parser.add_argument(
+        "--rec_bs",
+        type=int,
+        default=6,
+        help="Recognition model inference batch size")
+    parser.add_argument(
+        "--image", type=str, required=True, help="Path of test image file.")
+    parser.add_argument(
+        "--device",
+        type=str,
+        default='cpu',
+        help="Type of inference device, support 'cpu' or 'gpu'.")
+    parser.add_argument(
+        "--device_id",
+        type=int,
+        default=0,
+        help="Define which GPU card used to run model.")
+    parser.add_argument(
+        "--backend",
+        type=str,
+        default="default",
+        help="Type of inference backend, support ort/trt/paddle/openvino, default 'openvino' for cpu, 'tensorrt' for gpu"
+    )
+
+    return parser.parse_args()
+
+
+def build_option(args):
+    det_option = fd.RuntimeOption()
+    rec_option = fd.RuntimeOption()
+    table_option = fd.RuntimeOption()
+
+    if args.device.lower() == "gpu":
+        det_option.use_gpu(args.device_id)
+        rec_option.use_gpu(args.device_id)
+        table_option.use_gpu(args.device_id)
+
+    if args.backend.lower() == "trt":
+        assert args.device.lower(
+        ) == "gpu", "TensorRT backend require inference on device GPU."
+        det_option.use_trt_backend()
+        rec_option.use_trt_backend()
+        table_option.use_trt_backend()
+
+        # If use TRT backend, the dynamic shape will be set as follow.
+        # We recommend that users set the length and height of the detection model to a multiple of 32.
+        # We also recommend that users set the Trt input shape as follow.
+        det_option.set_trt_input_shape("x", [1, 3, 64, 64], [1, 3, 640, 640],
+                                       [1, 3, 960, 960])
+
+        rec_option.set_trt_input_shape("x", [1, 3, 48, 10],
+                                       [args.rec_bs, 3, 48, 320],
+                                       [args.rec_bs, 3, 48, 2304])
+
+        table_option.set_trt_input_shape("x", [1, 3, 488, 488])
+
+        # Users could save TRT cache file to disk as follow.
+        det_option.set_trt_cache_file(args.det_model + "/det_trt_cache.trt")
+        rec_option.set_trt_cache_file(args.rec_model + "/rec_trt_cache.trt")
+        table_option.set_trt_cache_file(args.table_model +
+                                        "/table_trt_cache.trt")
+
+    elif args.backend.lower() == "ort":
+        det_option.use_ort_backend()
+        rec_option.use_ort_backend()
+        table_option.use_ort_backend()
+
+    elif args.backend.lower() == "paddle":
+        det_option.use_paddle_infer_backend()
+        rec_option.use_paddle_infer_backend()
+        table_option.use_paddle_infer_backend()
+
+    elif args.backend.lower() == "openvino":
+        assert args.device.lower(
+        ) == "cpu", "OpenVINO backend require inference on device CPU."
+        det_option.use_openvino_backend()
+        rec_option.use_openvino_backend()
+        table_option.use_openvino_backend()
+
+    return det_option, rec_option, table_option
+
+
+args = parse_arguments()
+
+det_model_file = os.path.join(args.det_model, "inference.pdmodel")
+det_params_file = os.path.join(args.det_model, "inference.pdiparams")
+
+rec_model_file = os.path.join(args.rec_model, "inference.pdmodel")
+rec_params_file = os.path.join(args.rec_model, "inference.pdiparams")
+rec_label_file = args.rec_label_file
+
+table_model_file = os.path.join(args.table_model, "inference.pdmodel")
+table_params_file = os.path.join(args.table_model, "inference.pdiparams")
+table_char_dict_path = args.table_char_dict_path
+
+# Set the runtime option
+det_option, rec_option, table_option = build_option(args)
+
+det_model = fd.vision.ocr.DBDetector(
+    det_model_file, det_params_file, runtime_option=det_option)
+
+rec_model = fd.vision.ocr.Recognizer(
+    rec_model_file, rec_params_file, rec_label_file, runtime_option=rec_option)
+
+table_model = fd.vision.ocr.StructureV2Table(
+    table_model_file,
+    table_params_file,
+    table_char_dict_path,
+    runtime_option=table_option)
+
+det_model.preprocessor.max_side_len = 960
+det_model.postprocessor.det_db_thresh = 0.3
+det_model.postprocessor.det_db_box_thresh = 0.6
+det_model.postprocessor.det_db_unclip_ratio = 1.5
+det_model.postprocessor.det_db_score_mode = "slow"
+det_model.postprocessor.use_dilation = False
+
+ppstructurev2_table = fd.vision.ocr.PPStructureV2Table(
+    det_model=det_model, rec_model=rec_model, table_model=table_model)
+
+ppstructurev2_table.rec_batch_size = args.rec_bs
+
+# Read the input image
+im = cv2.imread(args.image)
+
+# Predict and reutrn the results
+result = ppstructurev2_table.predict(im)
+
+print(result)
+
+# Visuliaze the results.
+vis_im = fd.vision.vis_ppocr(im, result)
+cv2.imwrite("visualized_result.jpg", vis_im)
+print("Visualized result save in ./visualized_result.jpg")
--- a/examples/vision/ocr/PP-OCR/cpu-gpu/python/infer_structurev2_table.py
+++ b/examples/vision/ocr/PP-OCR/cpu-gpu/python/infer_structurev2_table.py
@@ -0,0 +1,77 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import fastdeploy as fd
+import cv2
+import os
+
+
+def parse_arguments():
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--table_model",
+        required=True,
+        help="Path of Table recognition model of PPOCR.")
+    parser.add_argument(
+        "--table_char_dict_path",
+        type=str,
+        required=True,
+        help="tabel recognition dict path.")
+    parser.add_argument(
+        "--image", type=str, required=True, help="Path of test image file.")
+    parser.add_argument(
+        "--device",
+        type=str,
+        default='cpu',
+        help="Type of inference device, support 'cpu' or 'gpu'.")
+    parser.add_argument(
+        "--device_id",
+        type=int,
+        default=0,
+        help="Define which GPU card used to run model.")
+
+    return parser.parse_args()
+
+
+def build_option(args):
+
+    table_option = fd.RuntimeOption()
+
+    if args.device.lower() == "gpu":
+        table_option.use_gpu(args.device_id)
+
+    return table_option
+
+
+args = parse_arguments()
+
+table_model_file = os.path.join(args.table_model, "inference.pdmodel")
+table_params_file = os.path.join(args.table_model, "inference.pdiparams")
+
+# Set the runtime option
+table_option = build_option(args)
+
+# Create the table_model
+table_model = fd.vision.ocr.StructureV2Table(
+    table_model_file, table_params_file, args.table_char_dict_path,
+    table_option)
+
+# Read the image
+im = cv2.imread(args.image)
+
+# Predict and return the results
+result = table_model.predict(im)
+
+print(result)