Updated OCR training utility to work better with Tesseract

This commit is contained in:
Matt Hill
2014-06-02 20:51:04 -04:00
parent 3dfadd2455
commit 39ecafaa90

View File

@@ -57,25 +57,45 @@ int main( int argc, const char** argv )
if (DirectoryExists(inDir.c_str()))
{
const int X_OFFSET = 10;
const int Y_OFFSET = 10;
const int CHAR_PADDING_HORIZONTAL = 0;
const int CHAR_PADDING_VERTICAL = 0;
const int X_OFFSET = 5;
const int Y_OFFSET = 5;
const int PAGE_MARGIN_X = 70;
const int PAGE_MARGIN_Y = 70;
const int HORIZONTAL_RESOLUTION = 3500;
const int MAX_VERTICAL_RESOLUTION = 6000; // Maximum vertical size before chopping into additional pages.
const int TILE_WIDTH = 55;
const int TILE_WIDTH = 25;
const int TILE_HEIGHT = 60;
const int CHAR_HORIZ_OFFSET = 40;
const int TILE_HEIGHT = 70;
const int CHAR_VERT_OFFSET = 48;
const int FIXED_CHAR_HEIGHT = 40; // RESIZE all characters to this height
vector<string> files = getFilesInDir(inDir.c_str());
sort( files.begin(), files.end(), stringCompare );
for (int i = 0; i< files.size(); i++)
{
if (hasEnding(files[i], ".png") || hasEnding(files[i], ".jpg"))
{
}
else
{
std::cerr << "Non-image file detected in this directory. This must be removed first" << std::endl;
return 1;
}
}
int tiles_per_row = ((float) (HORIZONTAL_RESOLUTION - (PAGE_MARGIN_X * 2))) / ((float) TILE_WIDTH);
int lines = files.size() / (tiles_per_row);
int vertical_resolution = (lines * TILE_HEIGHT) + (PAGE_MARGIN_Y * 2) ;
int vertical_resolution = (lines * TILE_HEIGHT) + (PAGE_MARGIN_Y * 3) ;
cout << tiles_per_row << " : " << vertical_resolution << endl;
Mat bigTif = Mat::zeros(Size(HORIZONTAL_RESOLUTION, vertical_resolution), CV_8U);
@@ -100,6 +120,8 @@ int main( int argc, const char** argv )
char charcode = files[i][0];
Mat characterImg = imread(fullpath);
Mat charImgCopy = Mat::zeros(Size(150, 150), characterImg.type());
bitwise_not(charImgCopy, charImgCopy);
@@ -112,14 +134,34 @@ int main( int argc, const char** argv )
//imshow("copy", charImgCopy);
findContours(charImgCopy, contours, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE);
Rect tallestRect(0, 0, 0, 0);
float minHeightPercent = 0.35;
int minHeight = (int) (((float) characterImg.rows) * minHeightPercent);
vector<Rect> tallEnoughRects;
for (int c = 0; c < contours.size(); c++)
{
Rect tmpRect = boundingRect(contours[c]);
if (tmpRect.height > tallestRect.height)
tallestRect = tmpRect;
if (tmpRect.height > minHeight)
tallEnoughRects.push_back( tmpRect );
}
int xMin = 9999999, xMax = 0, yMin = 9999999, yMax = 0;
// Combine all the "tall enough" rectangles into one super rectangle
for (int r = 0; r < tallEnoughRects.size(); r++)
{
if (tallEnoughRects[r].x < xMin)
xMin = tallEnoughRects[r].x;
if (tallEnoughRects[r].y < yMin)
yMin = tallEnoughRects[r].y;
if (tallEnoughRects[r].x + tallEnoughRects[r].width > xMax)
xMax = tallEnoughRects[r].x + tallEnoughRects[r].width;
if (tallEnoughRects[r].y + tallEnoughRects[r].height > yMax)
yMax = tallEnoughRects[r].y + tallEnoughRects[r].height;
}
Rect tallestRect(xMin, yMin, xMax - xMin, yMax - yMin);
//cout << tallestRect.x << ":" << tallestRect.y << " -- " << tallestRect.width << ":" << tallestRect.height << endl;
Rect cropRect(0, tallestRect.y - Y_OFFSET, tallestRect.width, tallestRect.height);
@@ -128,16 +170,16 @@ int main( int argc, const char** argv )
Mat cropped(characterImg, cropRect);
cvtColor(cropped, cropped, CV_BGR2GRAY);
Rect destinationRect(xPos + (CHAR_HORIZ_OFFSET - tallestRect.width), yPos + (CHAR_VERT_OFFSET - tallestRect.height), tallestRect.width, tallestRect.height);
Rect destinationRect(xPos + (CHAR_HORIZ_OFFSET - TILE_WIDTH), yPos + (CHAR_VERT_OFFSET - TILE_HEIGHT + (TILE_HEIGHT - tallestRect.height)), tallestRect.width, tallestRect.height);
//cout << "1" << endl;
cropped.copyTo(bigTif(destinationRect));
int x1= destinationRect.x - 2;
int y1 = (vertical_resolution - destinationRect.y - destinationRect.height) - 2;
int x2 = (destinationRect.x + destinationRect.width) + 2;
int y2 = (vertical_resolution - destinationRect.y) + 2;
int x1 = destinationRect.x - CHAR_PADDING_HORIZONTAL;
int y1 = (vertical_resolution - destinationRect.y - destinationRect.height) - CHAR_PADDING_VERTICAL;
int x2 = (destinationRect.x + destinationRect.width) + CHAR_PADDING_HORIZONTAL;
int y2 = (vertical_resolution - destinationRect.y) + CHAR_PADDING_VERTICAL;
//0 70 5602 85 5636 0
boxFileOut << charcode << " " << x1 << " " << y1 << " ";
boxFileOut << x2 << " " << y2 ;