diff --git a/benchmark/python/benchmark_ppocr.py b/benchmark/python/benchmark_ppocr.py index e58f6829d..cff4d2e88 100755 --- a/benchmark/python/benchmark_ppocr.py +++ b/benchmark/python/benchmark_ppocr.py @@ -317,6 +317,30 @@ if __name__ == '__main__': runtime_option=rec_option) model = fd.vision.ocr.PPOCRv3( det_model=det_model, cls_model=cls_model, rec_model=rec_model) + elif "OCRv4" in args.model_dir: + det_option = option + if args.backend in ["trt", "paddle_trt"]: + det_option.trt_option.set_shape( + "x", [1, 3, 64, 64], [1, 3, 640, 640], [1, 3, 960, 960]) + det_model = fd.vision.ocr.DBDetector( + det_model_file, det_params_file, runtime_option=det_option) + cls_option = option + if args.backend in ["trt", "paddle_trt"]: + cls_option.trt_option.set_shape( + "x", [1, 3, 48, 10], [10, 3, 48, 320], [64, 3, 48, 1024]) + cls_model = fd.vision.ocr.Classifier( + cls_model_file, cls_params_file, runtime_option=cls_option) + rec_option = option + if args.backend in ["trt", "paddle_trt"]: + rec_option.trt_option.set_shape( + "x", [1, 3, 48, 10], [10, 3, 48, 320], [64, 3, 48, 2304]) + rec_model = fd.vision.ocr.Recognizer( + rec_model_file, + rec_params_file, + rec_label_file, + runtime_option=rec_option) + model = fd.vision.ocr.PPOCRv4( + det_model=det_model, cls_model=cls_model, rec_model=rec_model) else: raise Exception("model {} not support now in ppocr series".format( args.model_dir)) diff --git a/c_api/fastdeploy_capi/vision/types_internal.h b/c_api/fastdeploy_capi/vision/types_internal.h old mode 100644 new mode 100755 index ea83562cf..ec8aaf766 --- a/c_api/fastdeploy_capi/vision/types_internal.h +++ b/c_api/fastdeploy_capi/vision/types_internal.h @@ -32,6 +32,7 @@ #include "fastdeploy/vision/ocr/ppocr/structurev2_table.h" #include "fastdeploy/vision/ocr/ppocr/ppocr_v2.h" #include "fastdeploy/vision/ocr/ppocr/ppocr_v3.h" +#include "fastdeploy/vision/ocr/ppocr/ppocr_v4.h" #include "fastdeploy/vision/ocr/ppocr/ppstructurev2_table.h" #include "fastdeploy/vision/segmentation/ppseg/model.h" @@ -187,6 +188,9 @@ DEFINE_PIPELINE_MODEL_WRAPPER_STRUCT(PPOCRv2, ppocrv2_model); // PPOCRv3 DEFINE_PIPELINE_MODEL_WRAPPER_STRUCT(PPOCRv3, ppocrv3_model); +// PPOCRv4 +DEFINE_PIPELINE_MODEL_WRAPPER_STRUCT(PPOCRv4, ppocrv4_model); + // PPStructureV2Table DEFINE_PIPELINE_MODEL_WRAPPER_STRUCT(PPStructureV2Table, ppstructurev2table_model); @@ -400,6 +404,9 @@ DECLARE_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(PPOCRv2, fd_ppocrv2_wrapper // PPOCRv3 DECLARE_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(PPOCRv3, fd_ppocrv3_wrapper); +// PPOCRv4 +DECLARE_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(PPOCRv4, fd_ppocrv4_wrapper); + // PPStructureV2Table DECLARE_PIPELINE_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(PPStructureV2Table, fd_ppstructurev2_table_wrapper); diff --git a/fastdeploy/vision.h b/fastdeploy/vision.h index 1c302f6d7..13182e0c0 100755 --- a/fastdeploy/vision.h +++ b/fastdeploy/vision.h @@ -59,6 +59,7 @@ #include "fastdeploy/vision/ocr/ppocr/structurev2_layout.h" #include "fastdeploy/vision/ocr/ppocr/ppocr_v2.h" #include "fastdeploy/vision/ocr/ppocr/ppocr_v3.h" +#include "fastdeploy/vision/ocr/ppocr/ppocr_v4.h" #include "fastdeploy/vision/ocr/ppocr/ppstructurev2_table.h" #include "fastdeploy/vision/ocr/ppocr/ppstructurev2_layout.h" #include "fastdeploy/vision/ocr/ppocr/recognizer.h" diff --git a/fastdeploy/vision/common/result.cc b/fastdeploy/vision/common/result.cc index 5897c9df8..8c615ca0e 100644 --- a/fastdeploy/vision/common/result.cc +++ b/fastdeploy/vision/common/result.cc @@ -672,8 +672,8 @@ std::string OCRResult::Str() { out = out + "]"; if (rec_scores.size() > 0) { - out = out + "rec text: " + text[n] + " rec score:" + - std::to_string(rec_scores[n]) + " "; + out = out + "rec text: " + text[n] + + " rec score:" + std::to_string(rec_scores[n]) + " "; } if (cls_labels.size() > 0) { out = out + "cls label: " + std::to_string(cls_labels[n]) + @@ -713,8 +713,8 @@ std::string OCRResult::Str() { cls_scores.size() > 0) { std::string out; for (int i = 0; i < rec_scores.size(); i++) { - out = out + "rec text: " + text[i] + " rec score:" + - std::to_string(rec_scores[i]) + " "; + out = out + "rec text: " + text[i] + + " rec score:" + std::to_string(rec_scores[i]) + " "; out = out + "cls label: " + std::to_string(cls_labels[i]) + " cls score: " + std::to_string(cls_scores[i]); out = out + "\n"; @@ -733,8 +733,8 @@ std::string OCRResult::Str() { cls_scores.size() == 0) { std::string out; for (int i = 0; i < rec_scores.size(); i++) { - out = out + "rec text: " + text[i] + " rec score:" + - std::to_string(rec_scores[i]) + " "; + out = out + "rec text: " + text[i] + + " rec score:" + std::to_string(rec_scores[i]) + " "; out = out + "\n"; } return out; @@ -781,9 +781,9 @@ std::string HeadPoseResult::Str() { std::string out; out = "HeadPoseResult: [yaw, pitch, roll]\n"; - out = out + "yaw: " + std::to_string(euler_angles[0]) + "\n" + "pitch: " + - std::to_string(euler_angles[1]) + "\n" + "roll: " + - std::to_string(euler_angles[2]) + "\n"; + out = out + "yaw: " + std::to_string(euler_angles[0]) + "\n" + + "pitch: " + std::to_string(euler_angles[1]) + "\n" + + "roll: " + std::to_string(euler_angles[2]) + "\n"; return out; } diff --git a/fastdeploy/vision/ocr/ocr_pybind.cc b/fastdeploy/vision/ocr/ocr_pybind.cc old mode 100644 new mode 100755 index c936a6ab0..0005c7242 --- a/fastdeploy/vision/ocr/ocr_pybind.cc +++ b/fastdeploy/vision/ocr/ocr_pybind.cc @@ -17,6 +17,7 @@ namespace fastdeploy { void BindPPOCRModel(pybind11::module& m); +void BindPPOCRv4(pybind11::module& m); void BindPPOCRv3(pybind11::module& m); void BindPPOCRv2(pybind11::module& m); void BindPPStructureV2Table(pybind11::module& m); @@ -24,6 +25,7 @@ void BindPPStructureV2Table(pybind11::module& m); void BindOcr(pybind11::module& m) { auto ocr_module = m.def_submodule("ocr", "Module to deploy OCR models"); BindPPOCRModel(ocr_module); + BindPPOCRv4(ocr_module); BindPPOCRv3(ocr_module); BindPPOCRv2(ocr_module); BindPPStructureV2Table(ocr_module); diff --git a/fastdeploy/vision/ocr/ppocr/ppocr_pybind.cc b/fastdeploy/vision/ocr/ppocr/ppocr_pybind.cc old mode 100644 new mode 100755 index af5597309..91826e48b --- a/fastdeploy/vision/ocr/ppocr/ppocr_pybind.cc +++ b/fastdeploy/vision/ocr/ppocr/ppocr_pybind.cc @@ -16,6 +16,38 @@ #include "fastdeploy/pybind/main.h" namespace fastdeploy { +void BindPPOCRv4(pybind11::module& m) { + // PPOCRv4 + pybind11::class_(m, "PPOCRv4") + + .def(pybind11::init()) + .def(pybind11::init()) + .def_property("cls_batch_size", &pipeline::PPOCRv4::GetClsBatchSize, + &pipeline::PPOCRv4::SetClsBatchSize) + .def_property("rec_batch_size", &pipeline::PPOCRv4::GetRecBatchSize, + &pipeline::PPOCRv4::SetRecBatchSize) + .def("clone", [](pipeline::PPOCRv4& self) { return self.Clone(); }) + .def("predict", + [](pipeline::PPOCRv4& self, pybind11::array& data) { + auto mat = PyArrayToCvMat(data); + vision::OCRResult res; + self.Predict(&mat, &res); + return res; + }) + .def("batch_predict", + [](pipeline::PPOCRv4& self, std::vector& data) { + std::vector images; + for (size_t i = 0; i < data.size(); ++i) { + images.push_back(PyArrayToCvMat(data[i])); + } + std::vector results; + self.BatchPredict(images, &results); + return results; + }); +} void BindPPOCRv3(pybind11::module& m) { // PPOCRv3 pybind11::class_(m, "PPOCRv3") diff --git a/fastdeploy/vision/ocr/ppocr/ppocr_v4.h b/fastdeploy/vision/ocr/ppocr/ppocr_v4.h new file mode 100755 index 000000000..17da6ec27 --- /dev/null +++ b/fastdeploy/vision/ocr/ppocr/ppocr_v4.h @@ -0,0 +1,80 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "fastdeploy/vision/ocr/ppocr/ppocr_v3.h" + +namespace fastdeploy { +/** \brief This pipeline can launch detection model, classification model and recognition model sequentially. All OCR pipeline APIs are defined inside this namespace. + * + */ +namespace pipeline { +/*! @brief PPOCRv4 is used to load PP-OCRv4 series models provided by PaddleOCR. + */ +class FASTDEPLOY_DECL PPOCRv4 : public PPOCRv3 { + public: + /** \brief Set up the detection model path, classification model path and recognition model path respectively. + * + * \param[in] det_model Path of detection model, e.g ./ch_PP-OCRv4_det_infer + * \param[in] cls_model Path of classification model, e.g ./ch_ppocr_mobile_v2.0_cls_infer + * \param[in] rec_model Path of recognition model, e.g ./ch_PP-OCRv4_rec_infer + */ + PPOCRv4(fastdeploy::vision::ocr::DBDetector* det_model, + fastdeploy::vision::ocr::Classifier* cls_model, + fastdeploy::vision::ocr::Recognizer* rec_model) + : PPOCRv3(det_model, cls_model, rec_model) { + // The only difference between v2 and v3 + auto preprocess_shape = recognizer_->GetPreprocessor().GetRecImageShape(); + preprocess_shape[1] = 48; + recognizer_->GetPreprocessor().SetRecImageShape(preprocess_shape); + } + /** \brief Classification model is optional, so this function is set up the detection model path and recognition model path respectively. + * + * \param[in] det_model Path of detection model, e.g ./ch_PP-OCRv4_det_infer + * \param[in] rec_model Path of recognition model, e.g ./ch_PP-OCRv4_rec_infer + */ + PPOCRv4(fastdeploy::vision::ocr::DBDetector* det_model, + fastdeploy::vision::ocr::Recognizer* rec_model) + : PPOCRv3(det_model, rec_model) { + // The only difference between v2 and v4 + auto preprocess_shape = recognizer_->GetPreprocessor().GetRecImageShape(); + preprocess_shape[1] = 48; + recognizer_->GetPreprocessor().SetRecImageShape(preprocess_shape); + } + + /** \brief Clone a new PPOCRv4 with less memory usage when multiple instances of the same model are created + * + * \return new PPOCRv4* type unique pointer + */ + std::unique_ptr Clone() const { + std::unique_ptr clone_model = utils::make_unique(PPOCRv4(*this)); + clone_model->detector_ = detector_->Clone().release(); + if (classifier_ != nullptr) { + clone_model->classifier_ = classifier_->Clone().release(); + } + clone_model->recognizer_ = recognizer_->Clone().release(); + return clone_model; + } +}; + +} // namespace pipeline + +namespace application { +namespace ocrsystem { + typedef pipeline::PPOCRv4 PPOCRSystemv4; +} // namespace ocrsystem +} // namespace application + +} // namespace fastdeploy diff --git a/fastdeploy/vision/visualize/visualize.h b/fastdeploy/vision/visualize/visualize.h index f5ceb8558..fa45c69bd 100755 --- a/fastdeploy/vision/visualize/visualize.h +++ b/fastdeploy/vision/visualize/visualize.h @@ -92,6 +92,7 @@ FASTDEPLOY_DECL cv::Mat VisDetection(const cv::Mat& im, int line_size = 1, float font_size = 0.5f, std::vector font_color = {255, 255, 255}, int font_thickness = 1); + /** \brief Show the visualized results with custom labels for detection models * * \param[in] im the input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format diff --git a/python/fastdeploy/vision/ocr/ppocr/__init__.py b/python/fastdeploy/vision/ocr/ppocr/__init__.py index 3cd1c62be..4f04b3210 100755 --- a/python/fastdeploy/vision/ocr/ppocr/__init__.py +++ b/python/fastdeploy/vision/ocr/ppocr/__init__.py @@ -848,6 +848,79 @@ class StructureV2Layout(FastDeployModel): def postprocessor(self, value): self._model.postprocessor = value +class PPOCRv4(FastDeployModel): + def __init__(self, det_model=None, cls_model=None, rec_model=None): + """Consruct a pipeline with text detector, direction classifier and text recognizer models + + :param det_model: (FastDeployModel) The detection model object created by fastdeploy.vision.ocr.DBDetector. + :param cls_model: (FastDeployModel) The classification model object created by fastdeploy.vision.ocr.Classifier. + :param rec_model: (FastDeployModel) The recognition model object created by fastdeploy.vision.ocr.Recognizer. + """ + assert det_model is not None and rec_model is not None, "The det_model and rec_model cannot be None." + if cls_model is None: + self.system_ = C.vision.ocr.PPOCRv4(det_model._model, + rec_model._model) + else: + self.system_ = C.vision.ocr.PPOCRv4( + det_model._model, cls_model._model, rec_model._model) + + def clone(self): + """Clone PPOCRv4 pipeline object + :return: a new PPOCRv4 pipeline object + """ + + class PPOCRv4Clone(PPOCRv4): + def __init__(self, system): + self.system_ = system + + clone_model = PPOCRv4Clone(self.system_.clone()) + return clone_model + + def predict(self, input_image): + """Predict an input image + :param input_image: (numpy.ndarray)The input image data, 3-D array with layout HWC, BGR format + :return: OCRResult + """ + return self.system_.predict(input_image) + + def batch_predict(self, images): + """Predict a batch of input image + :param images: (list of numpy.ndarray) The input image list, each element is a 3-D array with layout HWC, BGR format + :return: OCRBatchResult + """ + return self.system_.batch_predict(images) + + @property + def cls_batch_size(self): + return self.system_.cls_batch_size + + @cls_batch_size.setter + def cls_batch_size(self, value): + assert isinstance( + value, + int), "The value to set `cls_batch_size` must be type of int." + self.system_.cls_batch_size = value + + @property + def rec_batch_size(self): + return self.system_.rec_batch_size + + @rec_batch_size.setter + def rec_batch_size(self, value): + assert isinstance( + value, + int), "The value to set `rec_batch_size` must be type of int." + self.system_.rec_batch_size = value + +class PPOCRSystemv4(PPOCRv4): + def __init__(self, det_model=None, cls_model=None, rec_model=None): + logging.warning( + "DEPRECATED: fd.vision.ocr.PPOCRSystemv4 is deprecated, " + "please use fd.vision.ocr.PPOCRv4 instead.") + super(PPOCRSystemv4, self).__init__(det_model, cls_model, rec_model) + + def predict(self, input_image): + return super(PPOCRSystemv4, self).predict(input_image) class PPOCRv3(FastDeployModel): def __init__(self, det_model=None, cls_model=None, rec_model=None): diff --git a/python/fastdeploy/vision/visualize/__init__.py b/python/fastdeploy/vision/visualize/__init__.py index df74091a2..07ae8d638 100755 --- a/python/fastdeploy/vision/visualize/__init__.py +++ b/python/fastdeploy/vision/visualize/__init__.py @@ -63,6 +63,8 @@ def vis_perception(im_data, score_threshold, line_size, font_size) + + def vis_keypoint_detection(im_data, keypoint_det_result, conf_threshold=0.5): """Show the visualized results for keypoint detection models