[Doc] Rename PPOCRSystem to PPOCR and update comments. (#395)

* Imporve OCR Readme

* Improve OCR Readme

* Improve OCR Readme

* Improve OCR Readme

* Improve OCR Readme

* Add Initialize function to PP-OCR

* Add Initialize function to PP-OCR

* Add Initialize function to PP-OCR

* Make all the model links come from PaddleOCR

* Improve OCR readme

* Improve OCR readme

* Improve OCR readme

* Improve OCR readme

* Add Readme for vision results

* Add Readme for vision results

* Add Readme for vision results

* Add Readme for vision results

* Add Readme for vision results

* Add Readme for vision results

* Add Readme for vision results

* Add Readme for vision results

* Add Readme for vision results

* Add Readme for vision results

* Add check for label file in postprocess of Rec model

* Add check for label file in postprocess of Rec model

* Add check for label file in postprocess of Rec model

* Add check for label file in postprocess of Rec model

* Add check for label file in postprocess of Rec model

* Add check for label file in postprocess of Rec model

* Add comments to create API docs

* Improve OCR comments

* Rename OCR and add comments

* Make sure previous python example works

* Make sure previous python example works

Co-authored-by: Jason <jiangjiajun@baidu.com>
This commit is contained in:
yunyaoXYY
2022-10-19 17:21:48 +08:00
committed by GitHub
parent 85e1c647f6
commit 24317e1a14
23 changed files with 380 additions and 186 deletions

View File

@@ -42,8 +42,8 @@
#include "fastdeploy/vision/matting/ppmatting/ppmatting.h"
#include "fastdeploy/vision/ocr/ppocr/classifier.h"
#include "fastdeploy/vision/ocr/ppocr/dbdetector.h"
#include "fastdeploy/vision/ocr/ppocr/ppocr_system_v2.h"
#include "fastdeploy/vision/ocr/ppocr/ppocr_system_v3.h"
#include "fastdeploy/vision/ocr/ppocr/ppocr_v2.h"
#include "fastdeploy/vision/ocr/ppocr/ppocr_v3.h"
#include "fastdeploy/vision/ocr/ppocr/recognizer.h"
#include "fastdeploy/vision/segmentation/ppseg/model.h"
#endif

View File

@@ -17,13 +17,13 @@
namespace fastdeploy {
void BindPPOCRModel(pybind11::module& m);
void BindPPOCRSystemv3(pybind11::module& m);
void BindPPOCRSystemv2(pybind11::module& m);
void BindPPOCRv3(pybind11::module& m);
void BindPPOCRv2(pybind11::module& m);
void BindOcr(pybind11::module& m) {
auto ocr_module = m.def_submodule("ocr", "Module to deploy OCR models");
BindPPOCRModel(ocr_module);
BindPPOCRSystemv3(ocr_module);
BindPPOCRSystemv2(ocr_module);
BindPPOCRv3(ocr_module);
BindPPOCRv2(ocr_module);
}
} // namespace fastdeploy

View File

@@ -20,20 +20,36 @@
namespace fastdeploy {
namespace vision {
/** \brief All OCR series model APIs are defined inside this namespace
*
*/
namespace ocr {
/*! @brief Classifier object is used to load the classification model provided by PaddleOCR.
*/
class FASTDEPLOY_DECL Classifier : public FastDeployModel {
public:
Classifier();
/** \brief Set path of model file, and the configuration of runtime
*
* \param[in] model_file Path of model file, e.g ./ch_ppocr_mobile_v2.0_cls_infer/model.pdmodel.
* \param[in] params_file Path of parameter file, e.g ./ch_ppocr_mobile_v2.0_cls_infer/model.pdiparams, if the model format is ONNX, this parameter will be ignored.
* \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in `valid_cpu_backends`.
* \param[in] model_format Model format of the loaded model, default is Paddle format.
*/
Classifier(const std::string& model_file, const std::string& params_file = "",
const RuntimeOption& custom_option = RuntimeOption(),
const ModelFormat& model_format = ModelFormat::PADDLE);
/// Get model's name
std::string ModelName() const { return "ppocr/ocr_cls"; }
/** \brief Predict the input image and get OCR classification model result.
*
* \param[in] im The input image data, comes from cv::imread().
* \param[in] result The output of OCR classification model result will be writen to this structure.
* \return true if the prediction is successed, otherwise false.
*/
virtual bool Predict(cv::Mat* img, std::tuple<int, float>* result);
// pre & post parameters
// Pre & Post parameters
float cls_thresh;
std::vector<int> cls_image_shape;
int cls_batch_num;
@@ -44,9 +60,9 @@ class FASTDEPLOY_DECL Classifier : public FastDeployModel {
private:
bool Initialize();
/// Preprocess the input data, and set the preprocessed results to `outputs`
bool Preprocess(Mat* img, FDTensor* output);
/// Postprocess the inferenced results, and set the final result to `result`
bool Postprocess(FDTensor& infer_result, std::tuple<int, float>* result);
};

View File

@@ -20,22 +20,38 @@
namespace fastdeploy {
namespace vision {
/** \brief All OCR series model APIs are defined inside this namespace
*
*/
namespace ocr {
/*! @brief DBDetector object is used to load the detection model provided by PaddleOCR.
*/
class FASTDEPLOY_DECL DBDetector : public FastDeployModel {
public:
DBDetector();
/** \brief Set path of model file, and the configuration of runtime
*
* \param[in] model_file Path of model file, e.g ./ch_PP-OCRv3_det_infer/model.pdmodel.
* \param[in] params_file Path of parameter file, e.g ./ch_PP-OCRv3_det_infer/model.pdiparams, if the model format is ONNX, this parameter will be ignored.
* \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in `valid_cpu_backends`.
* \param[in] model_format Model format of the loaded model, default is Paddle format.
*/
DBDetector(const std::string& model_file, const std::string& params_file = "",
const RuntimeOption& custom_option = RuntimeOption(),
const ModelFormat& model_format = ModelFormat::PADDLE);
/// Get model's name
std::string ModelName() const { return "ppocr/ocr_det"; }
/** \brief Predict the input image and get OCR detection model result.
*
* \param[in] im The input image data, comes from cv::imread().
* \param[in] boxes_result The output of OCR detection model result will be writen to this structure.
* \return true if the prediction is successed, otherwise false.
*/
virtual bool Predict(cv::Mat* im,
std::vector<std::array<int, 8>>* boxes_result);
// pre&post process parameters
// Pre & Post process parameters
int max_side_len;
float ratio_h{};
@@ -53,14 +69,14 @@ class FASTDEPLOY_DECL DBDetector : public FastDeployModel {
private:
bool Initialize();
/// Preprocess the input data, and set the preprocessed results to `outputs`
bool Preprocess(Mat* mat, FDTensor* outputs,
std::map<std::string, std::array<float, 2>>* im_info);
/*! @brief Postprocess the inferenced results, and set the final result to `boxes_result`
*/
bool Postprocess(FDTensor& infer_result,
std::vector<std::array<int, 8>>* boxes_result,
const std::map<std::string, std::array<float, 2>>& im_info);
PostProcessor post_processor_;
};

View File

@@ -15,17 +15,17 @@
#include "fastdeploy/pybind/main.h"
namespace fastdeploy {
void BindPPOCRSystemv3(pybind11::module& m) {
// OCRSys
pybind11::class_<application::ocrsystem::PPOCRSystemv3, FastDeployModel>(
m, "PPOCRSystemv3")
void BindPPOCRv3(pybind11::module& m) {
// PPOCRv3
pybind11::class_<pipeline::PPOCRv3, FastDeployModel>(
m, "PPOCRv3")
.def(pybind11::init<fastdeploy::vision::ocr::DBDetector*,
fastdeploy::vision::ocr::Classifier*,
fastdeploy::vision::ocr::Recognizer*>())
.def(pybind11::init<fastdeploy::vision::ocr::DBDetector*,
fastdeploy::vision::ocr::Recognizer*>())
.def("predict", [](application::ocrsystem::PPOCRSystemv3& self,
.def("predict", [](pipeline::PPOCRv3& self,
pybind11::array& data) {
auto mat = PyArrayToCvMat(data);
vision::OCRResult res;
@@ -34,16 +34,16 @@ void BindPPOCRSystemv3(pybind11::module& m) {
});
}
void BindPPOCRSystemv2(pybind11::module& m) {
// OCRSys
pybind11::class_<application::ocrsystem::PPOCRSystemv2, FastDeployModel>(
m, "PPOCRSystemv2")
void BindPPOCRv2(pybind11::module& m) {
// PPOCRv2
pybind11::class_<pipeline::PPOCRv2, FastDeployModel>(
m, "PPOCRv2")
.def(pybind11::init<fastdeploy::vision::ocr::DBDetector*,
fastdeploy::vision::ocr::Classifier*,
fastdeploy::vision::ocr::Recognizer*>())
.def(pybind11::init<fastdeploy::vision::ocr::DBDetector*,
fastdeploy::vision::ocr::Recognizer*>())
.def("predict", [](application::ocrsystem::PPOCRSystemv2& self,
.def("predict", [](pipeline::PPOCRv2& self,
pybind11::array& data) {
auto mat = PyArrayToCvMat(data);
vision::OCRResult res;

View File

@@ -1,40 +0,0 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "fastdeploy/vision/ocr/ppocr/ppocr_system_v2.h"
namespace fastdeploy {
namespace application {
namespace ocrsystem {
class FASTDEPLOY_DECL PPOCRSystemv3 : public PPOCRSystemv2 {
public:
PPOCRSystemv3(fastdeploy::vision::ocr::DBDetector* det_model,
fastdeploy::vision::ocr::Classifier* cls_model,
fastdeploy::vision::ocr::Recognizer* rec_model) : PPOCRSystemv2(det_model, cls_model, rec_model) {
// The only difference between v2 and v3
recognizer_->rec_image_shape[1] = 48;
}
PPOCRSystemv3(fastdeploy::vision::ocr::DBDetector* det_model,
fastdeploy::vision::ocr::Recognizer* rec_model) : PPOCRSystemv2(det_model, rec_model) {
recognizer_->rec_image_shape[1] = 48;
}
};
} // namespace ocrsystem
} // namespace application
} // namespace fastdeploy

View File

@@ -12,27 +12,26 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/vision/ocr/ppocr/ppocr_system_v2.h"
#include "fastdeploy/vision/ocr/ppocr/ppocr_v2.h"
#include "fastdeploy/utils/perf.h"
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_utils.h"
namespace fastdeploy {
namespace application {
namespace ocrsystem {
PPOCRSystemv2::PPOCRSystemv2(fastdeploy::vision::ocr::DBDetector* det_model,
namespace pipeline {
PPOCRv2::PPOCRv2(fastdeploy::vision::ocr::DBDetector* det_model,
fastdeploy::vision::ocr::Classifier* cls_model,
fastdeploy::vision::ocr::Recognizer* rec_model)
: detector_(det_model), classifier_(cls_model), recognizer_(rec_model) {
recognizer_->rec_image_shape[1] = 32;
}
PPOCRSystemv2::PPOCRSystemv2(fastdeploy::vision::ocr::DBDetector* det_model,
PPOCRv2::PPOCRv2(fastdeploy::vision::ocr::DBDetector* det_model,
fastdeploy::vision::ocr::Recognizer* rec_model)
: detector_(det_model), recognizer_(rec_model) {
recognizer_->rec_image_shape[1] = 32;
}
bool PPOCRSystemv2::Initialized() const {
bool PPOCRv2::Initialized() const {
if (detector_ != nullptr && !detector_->Initialized()){
return false;
@@ -48,21 +47,21 @@ bool PPOCRSystemv2::Initialized() const {
return true;
}
bool PPOCRSystemv2::Detect(cv::Mat* img,
bool PPOCRv2::Detect(cv::Mat* img,
fastdeploy::vision::OCRResult* result) {
if (!detector_->Predict(img, &(result->boxes))) {
FDERROR << "There's error while detecting image in PPOCRSystem." << std::endl;
FDERROR << "There's error while detecting image in PPOCR." << std::endl;
return false;
}
vision::ocr::SortBoxes(result);
return true;
}
bool PPOCRSystemv2::Recognize(cv::Mat* img,
bool PPOCRv2::Recognize(cv::Mat* img,
fastdeploy::vision::OCRResult* result) {
std::tuple<std::string, float> rec_result;
if (!recognizer_->Predict(img, &rec_result)) {
FDERROR << "There's error while recognizing image in PPOCRSystem." << std::endl;
FDERROR << "There's error while recognizing image in PPOCR." << std::endl;
return false;
}
@@ -71,12 +70,12 @@ bool PPOCRSystemv2::Recognize(cv::Mat* img,
return true;
}
bool PPOCRSystemv2::Classify(cv::Mat* img,
bool PPOCRv2::Classify(cv::Mat* img,
fastdeploy::vision::OCRResult* result) {
std::tuple<int, float> cls_result;
if (!classifier_->Predict(img, &cls_result)) {
FDERROR << "There's error while classifying image in PPOCRSystem." << std::endl;
FDERROR << "There's error while classifying image in PPOCR." << std::endl;
return false;
}
@@ -85,7 +84,7 @@ bool PPOCRSystemv2::Classify(cv::Mat* img,
return true;
}
bool PPOCRSystemv2::Predict(cv::Mat* img,
bool PPOCRv2::Predict(cv::Mat* img,
fastdeploy::vision::OCRResult* result) {
result->Clear();
if (nullptr != detector_ && !Detect(img, result)) {
@@ -120,6 +119,5 @@ bool PPOCRSystemv2::Predict(cv::Mat* img,
return true;
};
} // namesapce ocrsystem
} // namespace application
} // namesapce pipeline
} // namespace fastdeploy

View File

@@ -26,18 +26,38 @@
#include "fastdeploy/vision/ocr/ppocr/utils/ocr_postprocess_op.h"
namespace fastdeploy {
namespace application {
namespace ocrsystem {
class FASTDEPLOY_DECL PPOCRSystemv2 : public FastDeployModel {
/** \brief This pipeline can launch detection model, classification model and recognition model sequentially. All OCR pipeline APIs are defined inside this namespace.
*
*/
namespace pipeline {
/*! @brief PPOCRv2 is used to load PP-OCRv2 series models provided by PaddleOCR.
*/
class FASTDEPLOY_DECL PPOCRv2 : public FastDeployModel {
public:
PPOCRSystemv2(fastdeploy::vision::ocr::DBDetector* det_model,
/** \brief Set up the detection model path, classification model path and recognition model path respectively.
*
* \param[in] det_model Path of detection model, e.g ./ch_PP-OCRv2_det_infer
* \param[in] cls_model Path of classification model, e.g ./ch_ppocr_mobile_v2.0_cls_infer
* \param[in] rec_model Path of recognition model, e.g ./ch_PP-OCRv2_rec_infer
*/
PPOCRv2(fastdeploy::vision::ocr::DBDetector* det_model,
fastdeploy::vision::ocr::Classifier* cls_model,
fastdeploy::vision::ocr::Recognizer* rec_model);
PPOCRSystemv2(fastdeploy::vision::ocr::DBDetector* det_model,
/** \brief Classification model is optional, so this function is set up the detection model path and recognition model path respectively.
*
* \param[in] det_model Path of detection model, e.g ./ch_PP-OCRv2_det_infer
* \param[in] rec_model Path of recognition model, e.g ./ch_PP-OCRv2_rec_infer
*/
PPOCRv2(fastdeploy::vision::ocr::DBDetector* det_model,
fastdeploy::vision::ocr::Recognizer* rec_model);
/** \brief Predict the input image and get OCR result.
*
* \param[in] im The input image data, comes from cv::imread().
* \param[in] result The output OCR result will be writen to this structure.
* \return true if the prediction successed, otherwise false.
*/
virtual bool Predict(cv::Mat* img, fastdeploy::vision::OCRResult* result);
bool Initialized() const override;
@@ -45,12 +65,19 @@ class FASTDEPLOY_DECL PPOCRSystemv2 : public FastDeployModel {
fastdeploy::vision::ocr::DBDetector* detector_ = nullptr;
fastdeploy::vision::ocr::Classifier* classifier_ = nullptr;
fastdeploy::vision::ocr::Recognizer* recognizer_ = nullptr;
/// Launch the detection process in OCR.
virtual bool Detect(cv::Mat* img, fastdeploy::vision::OCRResult* result);
/// Launch the recognition process in OCR.
virtual bool Recognize(cv::Mat* img, fastdeploy::vision::OCRResult* result);
/// Launch the classification process in OCR.
virtual bool Classify(cv::Mat* img, fastdeploy::vision::OCRResult* result);
};
namespace application {
namespace ocrsystem {
typedef pipeline::PPOCRv2 PPOCRSystemv2;
} // namespace ocrsystem
} // namespace application
} // namespace pipeline
} // namespace fastdeploy

View File

@@ -0,0 +1,62 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "fastdeploy/vision/ocr/ppocr/ppocr_v2.h"
namespace fastdeploy {
/** \brief This pipeline can launch detection model, classification model and recognition model sequentially. All OCR pipeline APIs are defined inside this namespace.
*
*/
namespace pipeline {
/*! @brief PPOCRv3 is used to load PP-OCRv3 series models provided by PaddleOCR.
*/
class FASTDEPLOY_DECL PPOCRv3 : public PPOCRv2 {
public:
/** \brief Set up the detection model path, classification model path and recognition model path respectively.
*
* \param[in] det_model Path of detection model, e.g ./ch_PP-OCRv3_det_infer
* \param[in] cls_model Path of classification model, e.g ./ch_ppocr_mobile_v2.0_cls_infer
* \param[in] rec_model Path of recognition model, e.g ./ch_PP-OCRv3_rec_infer
*/
PPOCRv3(fastdeploy::vision::ocr::DBDetector* det_model,
fastdeploy::vision::ocr::Classifier* cls_model,
fastdeploy::vision::ocr::Recognizer* rec_model)
: PPOCRv2(det_model, cls_model, rec_model) {
// The only difference between v2 and v3
recognizer_->rec_image_shape[1] = 48;
}
/** \brief Classification model is optional, so this function is set up the detection model path and recognition model path respectively.
*
* \param[in] det_model Path of detection model, e.g ./ch_PP-OCRv3_det_infer
* \param[in] rec_model Path of recognition model, e.g ./ch_PP-OCRv3_rec_infer
*/
PPOCRv3(fastdeploy::vision::ocr::DBDetector* det_model,
fastdeploy::vision::ocr::Recognizer* rec_model)
: PPOCRv2(det_model, rec_model) {
// The only difference between v2 and v3
recognizer_->rec_image_shape[1] = 48;
}
};
} // namespace pipeline
namespace application {
namespace ocrsystem {
typedef pipeline::PPOCRv3 PPOCRSystemv3;
} // namespace ocrsystem
} // namespace application
} // namespace fastdeploy

View File

@@ -56,9 +56,6 @@ Recognizer::Recognizer(const std::string& model_file,
runtime_option.model_format = model_format;
runtime_option.model_file = model_file;
runtime_option.params_file = params_file;
runtime_option.DeletePaddleBackendPass("matmul_transpose_reshape_fuse_pass");
runtime_option.DeletePaddleBackendPass(
"matmul_transpose_reshape_mkldnn_fuse_pass");
initialized = Initialize();

View File

@@ -20,22 +20,39 @@
namespace fastdeploy {
namespace vision {
/** \brief All OCR series model APIs are defined inside this namespace
*
*/
namespace ocr {
/*! @brief Recognizer object is used to load the recognition model provided by PaddleOCR.
*/
class FASTDEPLOY_DECL Recognizer : public FastDeployModel {
public:
Recognizer();
/** \brief Set path of model file, and the configuration of runtime
*
* \param[in] model_file Path of model file, e.g ./ch_PP-OCRv3_rec_infer/model.pdmodel.
* \param[in] params_file Path of parameter file, e.g ./ch_PP-OCRv3_rec_infer/model.pdiparams, if the model format is ONNX, this parameter will be ignored.
* \param[in] label_path Path of label file used by OCR recognition model. e.g ./ppocr_keys_v1.txt
* \param[in] custom_option RuntimeOption for inference, the default will use cpu, and choose the backend defined in `valid_cpu_backends`.
* \param[in] model_format Model format of the loaded model, default is Paddle format.
*/
Recognizer(const std::string& model_file, const std::string& params_file = "",
const std::string& label_path = "",
const RuntimeOption& custom_option = RuntimeOption(),
const ModelFormat& model_format = ModelFormat::PADDLE);
/// Get model's name
std::string ModelName() const { return "ppocr/ocr_rec"; }
/** \brief Predict the input image and get OCR recognition model result.
*
* \param[in] im The input image data, comes from cv::imread().
* \param[in] rec_result The output of OCR recognition model result will be writen to this structure.
* \return true if the prediction is successed, otherwise false.
*/
virtual bool Predict(cv::Mat* img,
std::tuple<std::string, float>* rec_result);
// pre & post parameters
// Pre & Post parameters
std::vector<std::string> label_list;
int rec_batch_num;
int rec_img_h;
@@ -48,10 +65,11 @@ class FASTDEPLOY_DECL Recognizer : public FastDeployModel {
private:
bool Initialize();
/// Preprocess the input data, and set the preprocessed results to `outputs`
bool Preprocess(Mat* img, FDTensor* outputs,
const std::vector<int>& rec_image_shape);
/*! @brief Postprocess the inferenced results, and set the final result to `rec_result`
*/
bool Postprocess(FDTensor& infer_result,
std::tuple<std::string, float>* rec_result);
};