[Model] Support PP-StructureV2-Layout model (#1867)

* [Model] init pp-structurev2-layout code * [Model] init pp-structurev2-layout code * [Model] init pp-structurev2-layout code * [Model] add structurev2_layout_preprocessor * [PP-StructureV2] add postprocessor and layout detector class * [PP-StructureV2] add postprocessor and layout detector class * [PP-StructureV2] add postprocessor and layout detector class * [PP-StructureV2] add postprocessor and layout detector class * [PP-StructureV2] add postprocessor and layout detector class * [pybind] add pp-structurev2-layout model pybind * [pybind] add pp-structurev2-layout model pybind * [Bug Fix] fixed code style * [examples] add pp-structurev2-layout c++ examples * [PP-StructureV2] add python example and docs * [benchmark] add pp-structurev2-layout benchmark support
2025-10-08 10:00:29 +08:00 · 2023-05-05 13:05:58 +08:00
parent 2c5fd91a7f
commit 6d0261e9e4
26 changed files with 1255 additions and 23 deletions
--- a/python/fastdeploy/vision/ocr/ppocr/init.py
+++ b/python/fastdeploy/vision/ocr/ppocr/init.py
@@ -650,7 +650,7 @@ class Recognizer(FastDeployModel):

 class StructureV2TablePreprocessor:
    def __init__(self):
-        """Create a preprocessor for StructureV2TableModel
+        """Create a preprocessor for StructureV2Table Model
        """
        self._preprocessor = C.vision.ocr.StructureV2TablePreprocessor()

@@ -664,12 +664,12 @@ class StructureV2TablePreprocessor:

 class StructureV2TablePostprocessor:
    def __init__(self):
-        """Create a postprocessor for StructureV2TableModel
+        """Create a postprocessor for StructureV2Table Model
        """
        self._postprocessor = C.vision.ocr.StructureV2TablePostprocessor()

    def run(self, runtime_results):
-        """Postprocess the runtime results for StructureV2TableModel
+        """Postprocess the runtime results for StructureV2Table Model
        :param: runtime_results: (list of FDTensor or list of pyArray)The output FDTensor results from runtime
        :return: list of Result(If the runtime_results is predict by batched samples, the length of this list equals to the batch size)
        """
@@ -683,10 +683,11 @@ class StructureV2Table(FastDeployModel):
                 table_char_dict_path="",
                 runtime_option=None,
                 model_format=ModelFormat.PADDLE):
-        """Load OCR StructureV2Table model provided by PaddleOCR.
+        """Load StructureV2Table model provided by PP-StructureV2.

        :param model_file: (str)Path of model file, e.g ./ch_ppocr_mobile_v2.0_cls_infer/model.pdmodel.
        :param params_file: (str)Path of parameter file, e.g ./ch_ppocr_mobile_v2.0_cls_infer/model.pdiparams, if the model format is ONNX, this parameter will be ignored.
+        :param table_char_dict_path: (str)Path of table_char_dict file, e.g ../ppocr/utils/dict/table_structure_dict_ch.txt
        :param runtime_option: (fastdeploy.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU.
        :param model_format: (fastdeploy.ModelForamt)Model format of the loaded model.
        """
@@ -703,8 +704,8 @@ class StructureV2Table(FastDeployModel):
            self._runnable = True

    def clone(self):
-        """Clone OCR StructureV2Table model object
-        :return: a new OCR StructureV2Table model object
+        """Clone StructureV2Table model object
+        :return: a new StructureV2Table model object
        """

        class StructureV2TableClone(StructureV2Table):
@@ -749,6 +750,105 @@ class StructureV2Table(FastDeployModel):
        self._model.postprocessor = value


+class StructureV2LayoutPreprocessor:
+    def __init__(self):
+        """Create a preprocessor for StructureV2Layout Model
+        """
+        self._preprocessor = C.vision.ocr.StructureV2LayoutPreprocessor()
+
+    def run(self, input_ims):
+        """Preprocess input images for StructureV2Layout Model
+        :param: input_ims: (list of numpy.ndarray)The input image
+        :return: list of FDTensor
+        """
+        return self._preprocessor.run(input_ims)
+
+
+class StructureV2LayoutPostprocessor:
+    def __init__(self):
+        """Create a postprocessor for StructureV2Layout Model
+        """
+        self._postprocessor = C.vision.ocr.StructureV2LayoutPostprocessor()
+
+    def run(self, runtime_results):
+        """Postprocess the runtime results for StructureV2Layout Model
+        :param: runtime_results: (list of FDTensor or list of pyArray)The output FDTensor results from runtime
+        :return: list of Result(If the runtime_results is predict by batched samples, the length of this list equals to the batch size)
+        """
+        return self._postprocessor.run(runtime_results)
+
+
+class StructureV2Layout(FastDeployModel):
+    def __init__(self,
+                 model_file="",
+                 params_file="",
+                 runtime_option=None,
+                 model_format=ModelFormat.PADDLE):
+        """Load StructureV2Layout model provided by PP-StructureV2.
+
+        :param model_file: (str)Path of model file, e.g ./picodet_lcnet_x1_0_fgd_layout_infer/model.pdmodel.
+        :param params_file: (str)Path of parameter file, e.g ./picodet_lcnet_x1_0_fgd_layout_infer/model.pdiparams, if the model format is ONNX, this parameter will be ignored.
+        :param runtime_option: (fastdeploy.RuntimeOption)RuntimeOption for inference this model, if it's None, will use the default backend on CPU.
+        :param model_format: (fastdeploy.ModelForamt)Model format of the loaded model.
+        """
+        super(StructureV2Layout, self).__init__(runtime_option)
+
+        if (len(model_file) == 0):
+            self._model = C.vision.ocr.StructureV2Layout()
+            self._runnable = False
+        else:
+            self._model = C.vision.ocr.StructureV2Layout(
+                model_file, params_file, self._runtime_option, model_format)
+            assert self.initialized, "StructureV2Layout model initialize failed."
+            self._runnable = True
+
+    def clone(self):
+        """Clone StructureV2Layout model object
+        :return: a new StructureV2Table model object
+        """
+
+        class StructureV2LayoutClone(StructureV2Layout):
+            def __init__(self, model):
+                self._model = model
+
+        clone_model = StructureV2LayoutClone(self._model.clone())
+        return clone_model
+
+    def predict(self, input_image):
+        """Predict an input image
+        :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
+        :return: bboxes
+        """
+        if self._runnable:
+            return self._model.predict(input_image)
+        return False
+
+    def batch_predict(self, images):
+        """Predict a batch of input image
+        :param images: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format
+        :return: list of bboxes list
+        """
+        if self._runnable:
+            return self._model.batch_predict(images)
+        return False
+
+    @property
+    def preprocessor(self):
+        return self._model.preprocessor
+
+    @preprocessor.setter
+    def preprocessor(self, value):
+        self._model.preprocessor = value
+
+    @property
+    def postprocessor(self):
+        return self._model.postprocessor
+
+    @postprocessor.setter
+    def postprocessor(self, value):
+        self._model.postprocessor = value
+
+
 class PPOCRv3(FastDeployModel):
    def __init__(self, det_model=None, cls_model=None, rec_model=None):
        """Consruct a pipeline with text detector, direction classifier and text recognizer models
--- a/python/fastdeploy/vision/visualize/init.py
+++ b/python/fastdeploy/vision/visualize/init.py
@@ -23,7 +23,9 @@ def vis_detection(im_data,
                  labels=[],
                  score_threshold=0.0,
                  line_size=1,
-                  font_size=0.5):
+                  font_size=0.5,
+                  font_color=[255, 255, 255],
+                  font_thickness=1):
    """Show the visualized results for detection models

    :param im_data: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format
@@ -32,10 +34,13 @@ def vis_detection(im_data,
    :param score_threshold: (float) score_threshold threshold for result scores, the bounding box will not be shown if the score is less than score_threshold
    :param line_size: (float) line_size line size for bounding boxes
    :param font_size: (float) font_size font size for text
+    :param font_color: (list of int) font_color  for text
+    :param font_thickness: (int) font_thickness for text
    :return: (numpy.ndarray) image with visualized results
    """
    return C.vision.vis_detection(im_data, det_result, labels, score_threshold,
-                                  line_size, font_size)
+                                  line_size, font_size, font_color,
+                                  font_thickness)


 def vis_perception(im_data,