// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #include "fastdeploy/pybind/main.h" namespace fastdeploy { void BindPPOCRModel(pybind11::module& m) { m.def("sort_boxes", [](std::vector>& boxes) { vision::ocr::SortBoxes(&boxes); return boxes; }); // DBDetector pybind11::class_(m, "DBDetectorPreprocessor") .def(pybind11::init<>()) .def_property("static_shape_infer", &vision::ocr::DBDetectorPreprocessor::GetStaticShapeInfer, &vision::ocr::DBDetectorPreprocessor::SetStaticShapeInfer) .def_property("max_side_len", &vision::ocr::DBDetectorPreprocessor::GetMaxSideLen, &vision::ocr::DBDetectorPreprocessor::SetMaxSideLen) .def("set_normalize", [](vision::ocr::DBDetectorPreprocessor& self, const std::vector& mean, const std::vector& std, bool is_scale) { self.SetNormalize(mean, std, is_scale); }) .def("run", [](vision::ocr::DBDetectorPreprocessor& self, std::vector& im_list) { std::vector images; for (size_t i = 0; i < im_list.size(); ++i) { images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); } std::vector outputs; self.Run(&images, &outputs); auto batch_det_img_info = self.GetBatchImgInfo(); for (size_t i = 0; i < outputs.size(); ++i) { outputs[i].StopSharing(); } return std::make_pair(outputs, *batch_det_img_info); }) .def("disable_normalize", [](vision::ocr::DBDetectorPreprocessor& self) { self.DisableNormalize(); }) .def("disable_permute", [](vision::ocr::DBDetectorPreprocessor& self) { self.DisablePermute(); }); pybind11::class_( m, "DBDetectorPostprocessor") .def(pybind11::init<>()) .def_property("det_db_thresh", &vision::ocr::DBDetectorPostprocessor::GetDetDBThresh, &vision::ocr::DBDetectorPostprocessor::SetDetDBThresh) .def_property("det_db_box_thresh", &vision::ocr::DBDetectorPostprocessor::GetDetDBBoxThresh, &vision::ocr::DBDetectorPostprocessor::SetDetDBBoxThresh) .def_property("det_db_unclip_ratio", &vision::ocr::DBDetectorPostprocessor::GetDetDBUnclipRatio, &vision::ocr::DBDetectorPostprocessor::SetDetDBUnclipRatio) .def_property("det_db_score_mode", &vision::ocr::DBDetectorPostprocessor::GetDetDBScoreMode, &vision::ocr::DBDetectorPostprocessor::SetDetDBScoreMode) .def_property("use_dilation", &vision::ocr::DBDetectorPostprocessor::GetUseDilation, &vision::ocr::DBDetectorPostprocessor::SetUseDilation) .def("run", [](vision::ocr::DBDetectorPostprocessor& self, std::vector& inputs, const std::vector>& batch_det_img_info) { std::vector>> results; if (!self.Run(inputs, &results, batch_det_img_info)) { throw std::runtime_error( "Failed to preprocess the input data in " "DBDetectorPostprocessor."); } return results; }) .def("run", [](vision::ocr::DBDetectorPostprocessor& self, std::vector& input_array, const std::vector>& batch_det_img_info) { std::vector>> results; std::vector inputs; PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); if (!self.Run(inputs, &results, batch_det_img_info)) { throw std::runtime_error( "Failed to preprocess the input data in " "DBDetectorPostprocessor."); } return results; }); pybind11::class_(m, "DBDetector") .def(pybind11::init()) .def(pybind11::init<>()) .def_property_readonly("preprocessor", &vision::ocr::DBDetector::GetPreprocessor) .def_property_readonly("postprocessor", &vision::ocr::DBDetector::GetPostprocessor) .def("predict", [](vision::ocr::DBDetector& self, pybind11::array& data) { auto mat = PyArrayToCvMat(data); vision::OCRResult ocr_result; self.Predict(mat, &ocr_result); return ocr_result; }) .def("batch_predict", [](vision::ocr::DBDetector& self, std::vector& data) { std::vector images; for (size_t i = 0; i < data.size(); ++i) { images.push_back(PyArrayToCvMat(data[i])); } std::vector ocr_results; self.BatchPredict(images, &ocr_results); return ocr_results; }); // Classifier pybind11::class_(m, "ClassifierPreprocessor") .def(pybind11::init<>()) .def_property("cls_image_shape", &vision::ocr::ClassifierPreprocessor::GetClsImageShape, &vision::ocr::ClassifierPreprocessor::SetClsImageShape) .def("set_normalize", [](vision::ocr::ClassifierPreprocessor& self, const std::vector& mean, const std::vector& std, bool is_scale) { self.SetNormalize(mean, std, is_scale); }) .def("run", [](vision::ocr::ClassifierPreprocessor& self, std::vector& im_list) { std::vector images; for (size_t i = 0; i < im_list.size(); ++i) { images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); } std::vector outputs; if (!self.Run(&images, &outputs)) { throw std::runtime_error( "Failed to preprocess the input data in " "ClassifierPreprocessor."); } for (size_t i = 0; i < outputs.size(); ++i) { outputs[i].StopSharing(); } return outputs; }) .def("disable_normalize", [](vision::ocr::ClassifierPreprocessor& self) { self.DisableNormalize(); }) .def("disable_permute", [](vision::ocr::ClassifierPreprocessor& self) { self.DisablePermute(); }); pybind11::class_( m, "ClassifierPostprocessor") .def(pybind11::init<>()) .def_property("cls_thresh", &vision::ocr::ClassifierPostprocessor::GetClsThresh, &vision::ocr::ClassifierPostprocessor::SetClsThresh) .def("run", [](vision::ocr::ClassifierPostprocessor& self, std::vector& inputs) { std::vector cls_labels; std::vector cls_scores; if (!self.Run(inputs, &cls_labels, &cls_scores)) { throw std::runtime_error( "Failed to preprocess the input data in " "ClassifierPostprocessor."); } return std::make_pair(cls_labels, cls_scores); }) .def("run", [](vision::ocr::ClassifierPostprocessor& self, std::vector& input_array) { std::vector inputs; PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); std::vector cls_labels; std::vector cls_scores; if (!self.Run(inputs, &cls_labels, &cls_scores)) { throw std::runtime_error( "Failed to preprocess the input data in " "ClassifierPostprocessor."); } return std::make_pair(cls_labels, cls_scores); }); pybind11::class_(m, "Classifier") .def(pybind11::init()) .def(pybind11::init<>()) .def_property_readonly("preprocessor", &vision::ocr::Classifier::GetPreprocessor) .def_property_readonly("postprocessor", &vision::ocr::Classifier::GetPostprocessor) .def("predict", [](vision::ocr::Classifier& self, pybind11::array& data) { auto mat = PyArrayToCvMat(data); vision::OCRResult ocr_result; self.Predict(mat, &ocr_result); return ocr_result; }) .def("batch_predict", [](vision::ocr::Classifier& self, std::vector& data) { std::vector images; for (size_t i = 0; i < data.size(); ++i) { images.push_back(PyArrayToCvMat(data[i])); } vision::OCRResult ocr_result; self.BatchPredict(images, &ocr_result); return ocr_result; }); // Recognizer pybind11::class_(m, "RecognizerPreprocessor") .def(pybind11::init<>()) .def_property("static_shape_infer", &vision::ocr::RecognizerPreprocessor::GetStaticShapeInfer, &vision::ocr::RecognizerPreprocessor::SetStaticShapeInfer) .def_property("rec_image_shape", &vision::ocr::RecognizerPreprocessor::GetRecImageShape, &vision::ocr::RecognizerPreprocessor::SetRecImageShape) .def("set_normalize", [](vision::ocr::RecognizerPreprocessor& self, const std::vector& mean, const std::vector& std, bool is_scale) { self.SetNormalize(mean, std, is_scale); }) .def("run", [](vision::ocr::RecognizerPreprocessor& self, std::vector& im_list) { std::vector images; for (size_t i = 0; i < im_list.size(); ++i) { images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); } std::vector outputs; if (!self.Run(&images, &outputs)) { throw std::runtime_error( "Failed to preprocess the input data in " "RecognizerPreprocessor."); } for (size_t i = 0; i < outputs.size(); ++i) { outputs[i].StopSharing(); } return outputs; }) .def("disable_normalize", [](vision::ocr::RecognizerPreprocessor& self) { self.DisableNormalize(); }) .def("disable_permute", [](vision::ocr::RecognizerPreprocessor& self) { self.DisablePermute(); }); pybind11::class_( m, "RecognizerPostprocessor") .def(pybind11::init()) .def("run", [](vision::ocr::RecognizerPostprocessor& self, std::vector& inputs) { std::vector texts; std::vector rec_scores; if (!self.Run(inputs, &texts, &rec_scores)) { throw std::runtime_error( "Failed to preprocess the input data in " "RecognizerPostprocessor."); } return std::make_pair(texts, rec_scores); }) .def("run", [](vision::ocr::RecognizerPostprocessor& self, std::vector& input_array) { std::vector inputs; PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); std::vector texts; std::vector rec_scores; if (!self.Run(inputs, &texts, &rec_scores)) { throw std::runtime_error( "Failed to preprocess the input data in " "RecognizerPostprocessor."); } return std::make_pair(texts, rec_scores); }); pybind11::class_(m, "Recognizer") .def(pybind11::init()) .def(pybind11::init<>()) .def_property_readonly("preprocessor", &vision::ocr::Recognizer::GetPreprocessor) .def_property_readonly("postprocessor", &vision::ocr::Recognizer::GetPostprocessor) .def("clone", [](vision::ocr::Recognizer& self) { return self.Clone(); }) .def("predict", [](vision::ocr::Recognizer& self, pybind11::array& data) { auto mat = PyArrayToCvMat(data); vision::OCRResult ocr_result; self.Predict(mat, &ocr_result); return ocr_result; }) .def("batch_predict", [](vision::ocr::Recognizer& self, std::vector& data) { std::vector images; for (size_t i = 0; i < data.size(); ++i) { images.push_back(PyArrayToCvMat(data[i])); } vision::OCRResult ocr_result; self.BatchPredict(images, &ocr_result); return ocr_result; }); // Table pybind11::class_(m, "StructureV2TablePreprocessor") .def(pybind11::init<>()) .def("run", [](vision::ocr::StructureV2TablePreprocessor& self, std::vector& im_list) { std::vector images; for (size_t i = 0; i < im_list.size(); ++i) { images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); } std::vector outputs; if (!self.Run(&images, &outputs)) { throw std::runtime_error( "Failed to preprocess the input data in " "StructureV2TablePreprocessor."); } auto batch_det_img_info = self.GetBatchImgInfo(); for (size_t i = 0; i < outputs.size(); ++i) { outputs[i].StopSharing(); } return std::make_pair(outputs, *batch_det_img_info); }); pybind11::class_( m, "StructureV2TablePostprocessor") .def(pybind11::init()) .def("run", [](vision::ocr::StructureV2TablePostprocessor& self, std::vector& inputs, const std::vector>& batch_det_img_info) { std::vector>> boxes; std::vector> structure_list; if (!self.Run(inputs, &boxes, &structure_list, batch_det_img_info)) { throw std::runtime_error( "Failed to postprocess the input data in " "StructureV2TablePostprocessor."); } return std::make_pair(boxes, structure_list); }) .def("run", [](vision::ocr::StructureV2TablePostprocessor& self, std::vector& input_array, const std::vector>& batch_det_img_info) { std::vector inputs; PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true); std::vector>> boxes; std::vector> structure_list; if (!self.Run(inputs, &boxes, &structure_list, batch_det_img_info)) { throw std::runtime_error( "Failed to postprocess the input data in " "StructureV2TablePostprocessor."); } return std::make_pair(boxes, structure_list); }); pybind11::class_( m, "StructureV2Table") .def(pybind11::init()) .def(pybind11::init<>()) .def_property_readonly("preprocessor", &vision::ocr::StructureV2Table::GetPreprocessor) .def_property_readonly("postprocessor", &vision::ocr::StructureV2Table::GetPostprocessor) .def("clone", [](vision::ocr::StructureV2Table& self) { return self.Clone(); }) .def("predict", [](vision::ocr::StructureV2Table& self, pybind11::array& data) { auto mat = PyArrayToCvMat(data); vision::OCRResult ocr_result; self.Predict(mat, &ocr_result); return ocr_result; }) .def("batch_predict", [](vision::ocr::StructureV2Table& self, std::vector& data) { std::vector images; for (size_t i = 0; i < data.size(); ++i) { images.push_back(PyArrayToCvMat(data[i])); } std::vector ocr_results; self.BatchPredict(images, &ocr_results); return ocr_results; }); // Layout pybind11::class_(m, "StructureV2LayoutPreprocessor") .def(pybind11::init<>()) .def_property( "static_shape_infer", &vision::ocr::StructureV2LayoutPreprocessor::GetStaticShapeInfer, &vision::ocr::StructureV2LayoutPreprocessor::SetStaticShapeInfer) .def_property( "layout_image_shape", &vision::ocr::StructureV2LayoutPreprocessor::GetLayoutImageShape, &vision::ocr::StructureV2LayoutPreprocessor::SetLayoutImageShape) .def("set_normalize", [](vision::ocr::StructureV2LayoutPreprocessor& self, const std::vector& mean, const std::vector& std, bool is_scale) { self.SetNormalize(mean, std, is_scale); }) .def("run", [](vision::ocr::StructureV2LayoutPreprocessor& self, std::vector& im_list) { std::vector images; for (size_t i = 0; i < im_list.size(); ++i) { images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i]))); } std::vector outputs; if (!self.Run(&images, &outputs)) { throw std::runtime_error( "Failed to preprocess the input data in " "StructureV2LayoutPreprocessor."); } auto batch_layout_img_info = self.GetBatchLayoutImgInfo(); for (size_t i = 0; i < outputs.size(); ++i) { outputs[i].StopSharing(); } return std::make_pair(outputs, *batch_layout_img_info); }) .def("disable_normalize", [](vision::ocr::StructureV2LayoutPreprocessor& self) { self.DisableNormalize(); }) .def("disable_permute", [](vision::ocr::StructureV2LayoutPreprocessor& self) { self.DisablePermute(); }); pybind11::class_( m, "StructureV2LayoutPostprocessor") .def(pybind11::init<>()) .def_property( "score_threshold", &vision::ocr::StructureV2LayoutPostprocessor::GetScoreThreshold, &vision::ocr::StructureV2LayoutPostprocessor::SetScoreThreshold) .def_property( "nms_threshold", &vision::ocr::StructureV2LayoutPostprocessor::GetNMSThreshold, &vision::ocr::StructureV2LayoutPostprocessor::SetNMSThreshold) .def_property("num_class", &vision::ocr::StructureV2LayoutPostprocessor::GetNumClass, &vision::ocr::StructureV2LayoutPostprocessor::SetNumClass) .def_property("fpn_stride", &vision::ocr::StructureV2LayoutPostprocessor::GetFPNStride, &vision::ocr::StructureV2LayoutPostprocessor::SetFPNStride) .def_property("reg_max", &vision::ocr::StructureV2LayoutPostprocessor::GetRegMax, &vision::ocr::StructureV2LayoutPostprocessor::SetRegMax) .def("run", [](vision::ocr::StructureV2LayoutPostprocessor& self, std::vector& inputs, const std::vector>& batch_layout_img_info) { std::vector results; if (!self.Run(inputs, &results, batch_layout_img_info)) { throw std::runtime_error( "Failed to postprocess the input data in " "StructureV2LayoutPostprocessor."); } return results; }); pybind11::class_( m, "StructureV2Layout") .def(pybind11::init()) .def(pybind11::init<>()) .def_property_readonly("preprocessor", &vision::ocr::StructureV2Layout::GetPreprocessor) .def_property_readonly("postprocessor", &vision::ocr::StructureV2Layout::GetPostprocessor) .def("clone", [](vision::ocr::StructureV2Layout& self) { return self.Clone(); }) .def("predict", [](vision::ocr::StructureV2Layout& self, pybind11::array& data) { auto mat = PyArrayToCvMat(data); vision::DetectionResult result; self.Predict(mat, &result); return result; }) .def("batch_predict", [](vision::ocr::StructureV2Layout& self, std::vector& data) { std::vector images; for (size_t i = 0; i < data.size(); ++i) { images.push_back(PyArrayToCvMat(data[i])); } std::vector results; self.BatchPredict(images, &results); return results; }); } } // namespace fastdeploy