Fix Diff & Improve OCR readme (#184)

* Add PaddleOCR Support

* Add PaddleOCR Support

* Add PaddleOCRv3 Support

* Add PaddleOCRv3 Support

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* Add PaddleOCRv3 Support

* Add PaddleOCRv3 Supports

* Add PaddleOCRv3 Suport

* Fix Rec diff

* Remove useless functions

* Remove useless comments

* Add PaddleOCRv2 Support

* Add PaddleOCRv3 & PaddleOCRv2 Support

* remove useless parameters

* Add utils of sorting det boxes

* Fix code naming convention

* Fix code naming convention

* Fix code naming convention

* Fix bug in the Classify process

* Imporve OCR Readme

* Fix diff in Cls model

* Update Model Download Link in Readme
This commit is contained in:
yunyaoXYY
2022-09-05 09:36:06 +08:00
committed by GitHub
parent 5f880dcdd0
commit fad82b4e5d
9 changed files with 46 additions and 53 deletions

View File

@@ -46,7 +46,7 @@ bool Classifier::Initialize() {
cls_thresh = 0.9;
cls_image_shape = {3, 48, 192};
cls_batch_num = 1;
mean = {0.485f, 0.456f, 0.406f};
mean = {0.5f, 0.5f, 0.5f};
scale = {0.5f, 0.5f, 0.5f};
is_scale = true;

View File

@@ -144,10 +144,11 @@ bool DBDetector::Postprocess(
std::vector<std::vector<std::vector<int>>> boxes;
post_processor_.BoxesFromBitmap(pred_map, &boxes, bit_map, det_db_box_thresh,
det_db_unclip_ratio, det_db_score_mode);
boxes =
post_processor_.BoxesFromBitmap(pred_map, bit_map, det_db_box_thresh,
det_db_unclip_ratio, det_db_score_mode);
post_processor_.FilterTagDetRes(&boxes, ratio_h, ratio_w, im_info);
boxes = post_processor_.FilterTagDetRes(boxes, ratio_h, ratio_w, im_info);
// boxes to boxes_result
for (int i = 0; i < boxes.size(); i++) {

View File

@@ -36,7 +36,7 @@ void PPOCRSystemv2::Detect(cv::Mat* img,
void PPOCRSystemv2::Recognize(cv::Mat* img,
fastdeploy::vision::OCRResult* result) {
std::tuple<std::string, float> rec_result;
this->recognizer->rec_image_shape[1] = 32;
this->recognizer->Predict(img, &rec_result);
result->text.push_back(std::get<0>(rec_result));

View File

@@ -94,7 +94,6 @@ bool PPOCRSystemv3::Predict(cv::Mat* img,
for (int i = 0; i < img_list.size(); i++) {
if ((result->cls_labels)[i] % 2 == 1 &&
(result->cls_scores)[i] > this->classifier->cls_thresh) {
std::cout << "Rotate this image " << std::endl;
cv::rotate(img_list[i], img_list[i], 1);
}
}

View File

@@ -250,9 +250,8 @@ float PostProcessor::BoxScoreFast(std::vector<std::vector<float>> box_array,
//这个应该是DB差分二值化相关的内容方法从 Bitmap 图中获取检测框
//涉及到box_thresh低于这个阈值的boxs不予显示和det_db_unclip_ratio文本框扩张的系数关系到文本框的大小
void PostProcessor::BoxesFromBitmap(
const cv::Mat pred, std::vector<std::vector<std::vector<int>>> *boxes,
const cv::Mat bitmap, const float &box_thresh,
std::vector<std::vector<std::vector<int>>> PostProcessor::BoxesFromBitmap(
const cv::Mat pred, const cv::Mat bitmap, const float &box_thresh,
const float &det_db_unclip_ratio, const std::string &det_db_score_mode) {
const int min_size = 3;
const int max_candidates = 1000;
@@ -269,6 +268,8 @@ void PostProcessor::BoxesFromBitmap(
int num_contours =
contours.size() >= max_candidates ? max_candidates : contours.size();
std::vector<std::vector<std::vector<int>>> boxes;
for (int _i = 0; _i < num_contours; _i++) {
if (contours[_i].size() <= 2) {
continue;
@@ -318,45 +319,42 @@ void PostProcessor::BoxesFromBitmap(
0, float(dest_height)))};
intcliparray.push_back(a);
}
boxes->push_back(intcliparray);
boxes.push_back(intcliparray);
} // end for
// return true;
return boxes;
}
//方法根据识别结果获取目标框位置
void PostProcessor::FilterTagDetRes(
std::vector<std::vector<std::vector<int>>> *boxes, const float ratio_h,
const float ratio_w,
const std::map<std::string, std::array<float, 2>> &im_info) {
std::vector<std::vector<std::vector<int>>> PostProcessor::FilterTagDetRes(
std::vector<std::vector<std::vector<int>>> boxes, float ratio_h,
float ratio_w, const std::map<std::string, std::array<float, 2>> &im_info) {
int oriimg_h = im_info.at("input_shape")[0];
int oriimg_w = im_info.at("input_shape")[1];
for (int n = 0; n < boxes->size(); n++) {
(*boxes)[n] = OrderPointsClockwise((*boxes)[n]);
for (int m = 0; m < (*boxes)[0].size(); m++) {
(*boxes)[n][m][0] /= ratio_w;
(*boxes)[n][m][1] /= ratio_h;
std::vector<std::vector<std::vector<int>>> root_points;
for (int n = 0; n < boxes.size(); n++) {
boxes[n] = OrderPointsClockwise(boxes[n]);
for (int m = 0; m < boxes[0].size(); m++) {
boxes[n][m][0] /= ratio_w;
boxes[n][m][1] /= ratio_h;
(*boxes)[n][m][0] = int(_min(_max((*boxes)[n][m][0], 0), oriimg_w - 1));
(*boxes)[n][m][1] = int(_min(_max((*boxes)[n][m][1], 0), oriimg_h - 1));
boxes[n][m][0] = int(_min(_max(boxes[n][m][0], 0), oriimg_w - 1));
boxes[n][m][1] = int(_min(_max(boxes[n][m][1], 0), oriimg_h - 1));
}
}
//此时已经拿到所有的点. 再进行下面的筛选
for (int n = (*boxes).size() - 1; n >= 0; n--) {
for (int n = 0; n < boxes.size(); n++) {
int rect_width, rect_height;
rect_width = int(sqrt(pow((*boxes)[n][0][0] - (*boxes)[n][1][0], 2) +
pow((*boxes)[n][0][1] - (*boxes)[n][1][1], 2)));
rect_height = int(sqrt(pow((*boxes)[n][0][0] - (*boxes)[n][3][0], 2) +
pow((*boxes)[n][0][1] - (*boxes)[n][3][1], 2)));
//小于4的删除掉. erase配合逆序遍历.
if (rect_width <= 4 || rect_height <= 4) {
boxes->erase(boxes->begin() + n);
}
rect_width = int(sqrt(pow(boxes[n][0][0] - boxes[n][1][0], 2) +
pow(boxes[n][0][1] - boxes[n][1][1], 2)));
rect_height = int(sqrt(pow(boxes[n][0][0] - boxes[n][3][0], 2) +
pow(boxes[n][0][1] - boxes[n][3][1], 2)));
if (rect_width <= 4 || rect_height <= 4) continue;
root_points.push_back(boxes[n]);
}
return root_points;
}
} // namespace ocr
} // namespace vision
} // namespace fastdeploy

View File

@@ -52,15 +52,13 @@ class PostProcessor {
float BoxScoreFast(std::vector<std::vector<float>> box_array, cv::Mat pred);
float PolygonScoreAcc(std::vector<cv::Point> contour, cv::Mat pred);
void BoxesFromBitmap(const cv::Mat pred,
std::vector<std::vector<std::vector<int>>> *boxes,
const cv::Mat bitmap, const float &box_thresh,
const float &det_db_unclip_ratio,
const std::string &det_db_score_mode);
std::vector<std::vector<std::vector<int>>> BoxesFromBitmap(
const cv::Mat pred, const cv::Mat bitmap, const float &box_thresh,
const float &det_db_unclip_ratio, const std::string &det_db_score_mode);
void FilterTagDetRes(
std::vector<std::vector<std::vector<int>>> *boxes, const float ratio_h,
const float ratio_w,
std::vector<std::vector<std::vector<int>>> FilterTagDetRes(
std::vector<std::vector<std::vector<int>>> boxes, float ratio_h,
float ratio_w,
const std::map<std::string, std::array<float, 2>> &im_info);
private: