From 39ecafaa90200cf62553b4ee68259b660193ca1a Mon Sep 17 00:00:00 2001 From: Matt Hill Date: Mon, 2 Jun 2014 20:51:04 -0400 Subject: [PATCH] Updated OCR training utility to work better with Tesseract --- src/misc_utilities/prepcharsfortraining.cpp | 70 ++++++++++++++++----- 1 file changed, 56 insertions(+), 14 deletions(-) diff --git a/src/misc_utilities/prepcharsfortraining.cpp b/src/misc_utilities/prepcharsfortraining.cpp index 6fbfde7..6ab47bd 100644 --- a/src/misc_utilities/prepcharsfortraining.cpp +++ b/src/misc_utilities/prepcharsfortraining.cpp @@ -57,25 +57,45 @@ int main( int argc, const char** argv ) if (DirectoryExists(inDir.c_str())) { - const int X_OFFSET = 10; - const int Y_OFFSET = 10; + const int CHAR_PADDING_HORIZONTAL = 0; + const int CHAR_PADDING_VERTICAL = 0; + + const int X_OFFSET = 5; + const int Y_OFFSET = 5; const int PAGE_MARGIN_X = 70; const int PAGE_MARGIN_Y = 70; const int HORIZONTAL_RESOLUTION = 3500; + const int MAX_VERTICAL_RESOLUTION = 6000; // Maximum vertical size before chopping into additional pages. - const int TILE_WIDTH = 55; + const int TILE_WIDTH = 25; + const int TILE_HEIGHT = 60; const int CHAR_HORIZ_OFFSET = 40; - const int TILE_HEIGHT = 70; const int CHAR_VERT_OFFSET = 48; + const int FIXED_CHAR_HEIGHT = 40; // RESIZE all characters to this height + vector files = getFilesInDir(inDir.c_str()); sort( files.begin(), files.end(), stringCompare ); - + + for (int i = 0; i< files.size(); i++) + { + if (hasEnding(files[i], ".png") || hasEnding(files[i], ".jpg")) + { + + } + else + { + std::cerr << "Non-image file detected in this directory. This must be removed first" << std::endl; + return 1; + } + } + + int tiles_per_row = ((float) (HORIZONTAL_RESOLUTION - (PAGE_MARGIN_X * 2))) / ((float) TILE_WIDTH); int lines = files.size() / (tiles_per_row); - int vertical_resolution = (lines * TILE_HEIGHT) + (PAGE_MARGIN_Y * 2) ; + int vertical_resolution = (lines * TILE_HEIGHT) + (PAGE_MARGIN_Y * 3) ; cout << tiles_per_row << " : " << vertical_resolution << endl; Mat bigTif = Mat::zeros(Size(HORIZONTAL_RESOLUTION, vertical_resolution), CV_8U); @@ -100,6 +120,8 @@ int main( int argc, const char** argv ) char charcode = files[i][0]; Mat characterImg = imread(fullpath); + + Mat charImgCopy = Mat::zeros(Size(150, 150), characterImg.type()); bitwise_not(charImgCopy, charImgCopy); @@ -112,14 +134,34 @@ int main( int argc, const char** argv ) //imshow("copy", charImgCopy); findContours(charImgCopy, contours, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE); - Rect tallestRect(0, 0, 0, 0); + float minHeightPercent = 0.35; + int minHeight = (int) (((float) characterImg.rows) * minHeightPercent); + + vector tallEnoughRects; for (int c = 0; c < contours.size(); c++) { Rect tmpRect = boundingRect(contours[c]); - if (tmpRect.height > tallestRect.height) - tallestRect = tmpRect; + if (tmpRect.height > minHeight) + tallEnoughRects.push_back( tmpRect ); } + int xMin = 9999999, xMax = 0, yMin = 9999999, yMax = 0; + // Combine all the "tall enough" rectangles into one super rectangle + for (int r = 0; r < tallEnoughRects.size(); r++) + { + if (tallEnoughRects[r].x < xMin) + xMin = tallEnoughRects[r].x; + if (tallEnoughRects[r].y < yMin) + yMin = tallEnoughRects[r].y; + if (tallEnoughRects[r].x + tallEnoughRects[r].width > xMax) + xMax = tallEnoughRects[r].x + tallEnoughRects[r].width; + if (tallEnoughRects[r].y + tallEnoughRects[r].height > yMax) + yMax = tallEnoughRects[r].y + tallEnoughRects[r].height; + } + + Rect tallestRect(xMin, yMin, xMax - xMin, yMax - yMin); + + //cout << tallestRect.x << ":" << tallestRect.y << " -- " << tallestRect.width << ":" << tallestRect.height << endl; Rect cropRect(0, tallestRect.y - Y_OFFSET, tallestRect.width, tallestRect.height); @@ -128,16 +170,16 @@ int main( int argc, const char** argv ) Mat cropped(characterImg, cropRect); cvtColor(cropped, cropped, CV_BGR2GRAY); - Rect destinationRect(xPos + (CHAR_HORIZ_OFFSET - tallestRect.width), yPos + (CHAR_VERT_OFFSET - tallestRect.height), tallestRect.width, tallestRect.height); + Rect destinationRect(xPos + (CHAR_HORIZ_OFFSET - TILE_WIDTH), yPos + (CHAR_VERT_OFFSET - TILE_HEIGHT + (TILE_HEIGHT - tallestRect.height)), tallestRect.width, tallestRect.height); //cout << "1" << endl; cropped.copyTo(bigTif(destinationRect)); - int x1= destinationRect.x - 2; - int y1 = (vertical_resolution - destinationRect.y - destinationRect.height) - 2; - int x2 = (destinationRect.x + destinationRect.width) + 2; - int y2 = (vertical_resolution - destinationRect.y) + 2; + int x1 = destinationRect.x - CHAR_PADDING_HORIZONTAL; + int y1 = (vertical_resolution - destinationRect.y - destinationRect.height) - CHAR_PADDING_VERTICAL; + int x2 = (destinationRect.x + destinationRect.width) + CHAR_PADDING_HORIZONTAL; + int y2 = (vertical_resolution - destinationRect.y) + CHAR_PADDING_VERTICAL; //0 70 5602 85 5636 0 boxFileOut << charcode << " " << x1 << " " << y1 << " "; boxFileOut << x2 << " " << y2 ;