diff --git a/src/misc_utilities/CMakeLists.txt b/src/misc_utilities/CMakeLists.txt
index 108fa67..f46cb44 100644
--- a/src/misc_utilities/CMakeLists.txt
+++ b/src/misc_utilities/CMakeLists.txt
@@ -35,7 +35,13 @@ TARGET_LINK_LIBRARIES(openalpr-utils-prepcharsfortraining
support
${OpenCV_LIBS}
)
-
+
+ADD_EXECUTABLE( openalpr-utils-binarizefontsheet binarizefontsheet.cpp )
+TARGET_LINK_LIBRARIES(openalpr-utils-binarizefontsheet
+ openalpr
+ support
+ ${OpenCV_LIBS}
+ )
ADD_EXECUTABLE( openalpr-utils-tagplates tagplates.cpp )
TARGET_LINK_LIBRARIES(openalpr-utils-tagplates
diff --git a/src/misc_utilities/binarizefontsheet.cpp b/src/misc_utilities/binarizefontsheet.cpp
new file mode 100644
index 0000000..5acf523
--- /dev/null
+++ b/src/misc_utilities/binarizefontsheet.cpp
@@ -0,0 +1,273 @@
+/*
+ * Copyright (c) 2015 New Designs Unlimited, LLC
+ * Opensource Automated License Plate Recognition [http://www.openalpr.com]
+ *
+ * This file is part of OpenAlpr.
+ *
+ * OpenAlpr is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License
+ * version 3 as published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see .
+*/
+
+#include "opencv2/highgui/highgui.hpp"
+#include "opencv2/imgproc/imgproc.hpp"
+
+#include
+#include
+#include
+#include
+#include "../tclap/CmdLine.h"
+#include "utility.h"
+#include "support/utf8.h"
+
+using namespace std;
+
+using namespace std;
+using namespace cv;
+using namespace alpr;
+
+
+// This utility operates on a large image file generated from a TTF font file
+// The font sheet is used to train OCR. The process is:
+// Find the exact ttf font used by the number plates.
+// Generate text with all of the characters that could be on a license plate
+// Print the pages on a piece of paper
+// Take pictures with a digital camera under different lighting conditions to add realistic noise
+// Binarize and process each character with OpenALPR morphology functions to make them look similar to how OpenALPR sees them. Produce a tif file.
+// Produce a box file based on text from #2 and the image from #5.
+// Train OCR with this box/tif data
+
+
+// This utility is used before "prepcharsfortraining"
+// Given a series of images (font sheets) and a text file giving the order, it will pluck out each character, binarize it,
+// and output it to a single image file for each character.
+
+// These characters can later be reassembled into a tif/box file using prepcharsfortraining.
+
+
+
+
+
+bool sort_lined_rectangles(Rect i, Rect j) {
+
+ // If they're on different lines
+ if (abs(i.y - j.y) > 15)
+ {
+ return i.y < j.y;
+ }
+
+ // They're on the same line, give the left-most.
+ return (i.x < j.x);
+}
+
+void show_debug_image(vector rectangles, Mat img)
+{
+ Mat debugImg;
+ cvtColor(img, debugImg, CV_GRAY2BGR);
+ for (unsigned int i = 0; i < rectangles.size(); i++)
+ {
+ Rect mr = rectangles[i];
+ Mat croppedChar = img(mr);
+
+ rectangle(debugImg, mr, Scalar(0,0,255), 2);
+ putText(debugImg, toString(i), mr.tl(), FONT_HERSHEY_PLAIN, 1.3, Scalar(0,0,0), 2);
+ }
+ float new_height = 1000;
+ float aspect_ratio = ((float)debugImg.rows) / ((float) new_height);
+ float new_width = ((float) debugImg.cols) / aspect_ratio;
+ resize(debugImg, debugImg, Size(new_width, new_height));
+ drawAndWait(&debugImg);
+}
+
+int main(int argc, char** argv) {
+
+ const int MIN_RECTANGLE_AREA_PIXELS = 500;
+ const int MIN_SPECKLE_AREA_PIXELS = 20;
+ const int BLOBBER_EROSION_SIZE=6;
+
+ vector font_sheet_files;
+ string char_list_file;
+ string out_dir;
+
+
+ TCLAP::CmdLine cmd("OpenAlpr OCR Training Font Sheet Prep Utility", ' ', "1.0.0");
+
+ TCLAP::UnlabeledMultiArg fontSheetArg( "font_sheet", "List of font sheet images", true, "", "font_sheet" );
+
+ TCLAP::ValueArg charListArg("","character_file","Text file with the text/order of the individual characters in the font sheets",true, "" ,"character_file");
+
+ TCLAP::ValueArg outDirArg("","out_dir","Output directory to put the character images",true, "" ,"output_dir");
+
+
+ try
+ {
+ cmd.add( fontSheetArg );
+ cmd.add( charListArg );
+ cmd.add( outDirArg );
+
+
+ if (cmd.parse( argc, argv ) == false)
+ {
+ // Error occured while parsing. Exit now.
+ return 1;
+ }
+
+ font_sheet_files = fontSheetArg.getValue();
+ char_list_file = charListArg.getValue();
+ out_dir = outDirArg.getValue();
+
+ }
+ catch (TCLAP::ArgException &e) // catch any exceptions
+ {
+ std::cerr << "error: " << e.error() << " for arg " << e.argId() << std::endl;
+ return 1;
+ }
+
+
+
+ if (DirectoryExists(out_dir.c_str()) == false)
+ {
+ cout << "Output dir: " << out_dir << " does not exist" << endl;
+ return 1;
+ }
+
+ if (fileExists(char_list_file.c_str()) == false)
+ {
+ cout << "Character text file: " << char_list_file << " does not exist" << endl;
+ return 1;
+ }
+
+ // Verify all the font sheet files exist
+ for (unsigned int i = 0; i < font_sheet_files.size(); i++)
+ {
+ if (fileExists(font_sheet_files[i].c_str()) == false)
+ {
+ cout << "Font sheet image: " << font_sheet_files[i] << " does not exist." << endl;
+ return 1;
+ }
+ }
+
+ // Read the text content from the character list file
+ std::ifstream fs(char_list_file.c_str());
+ std::string text_content((std::istreambuf_iterator(fs)),
+ std::istreambuf_iterator());
+ fs.close();
+
+ for (unsigned int font_sheet_index = 0; font_sheet_index < font_sheet_files.size(); font_sheet_index++)
+ {
+ cout << "Processing: " << font_sheet_files[font_sheet_index] << endl;
+
+ Mat frame = cv::imread( font_sheet_files[font_sheet_index] );
+
+ Config config("us");
+
+ cvtColor(frame, frame, CV_BGR2GRAY);
+ vector thresholds = produceThresholds(frame, &config);
+
+
+ for (unsigned int t = 0; t < thresholds.size(); t++)
+ {
+
+ // First clean up any tiny speckles
+ Mat speckle_copy(thresholds[t].size(), thresholds[t].type());
+ thresholds[t].copyTo(speckle_copy);
+
+
+ vector > speckle_contours;
+ vector speckle_hierarchy;
+ findContours(speckle_copy, speckle_contours, speckle_hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE);
+ Mat testImg = Mat::zeros(thresholds[t].size(), thresholds[t].type());
+
+ for (unsigned int i = 0; i < speckle_contours.size(); i++)
+ {
+ Rect speckleRect = boundingRect(speckle_contours[i]);
+
+ if (speckleRect.area() < MIN_SPECKLE_AREA_PIXELS)
+ {
+ drawContours(thresholds[t], speckle_contours, i, Scalar(0,0,0), CV_FILLED);
+ drawContours(testImg, speckle_contours, i, Scalar(255,255,255), CV_FILLED);
+ }
+ }
+ resize(testImg, testImg, Size(700, 1000));
+ drawAndWait(&testImg);
+
+ // Adjust the threshold w/ the morphology operation that OpenALPR uses
+ Mat closureElement = getStructuringElement( 1,
+ Size( 2 + 1, 2+1 ),
+ Point( 1, 1 ) );
+ morphologyEx(thresholds[t], thresholds[t], MORPH_CLOSE, closureElement);
+
+ Mat blobby;
+ Mat element = getStructuringElement( MORPH_RECT,
+ Size( 2*BLOBBER_EROSION_SIZE + 1, 2*BLOBBER_EROSION_SIZE+1 ),
+ Point( BLOBBER_EROSION_SIZE, BLOBBER_EROSION_SIZE ) );
+ dilate(thresholds[t], blobby, element );
+ erode(blobby, blobby, element);
+
+
+ vector > contours;
+ vector hierarchy;
+ findContours(blobby, contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE);
+
+ bitwise_not(thresholds[t], thresholds[t]);
+
+ vector rectangles;
+ for (unsigned int i = 0; i < contours.size(); i++)
+ {
+ Rect mr = boundingRect(contours[i]);
+
+ if (mr.area() >= MIN_RECTANGLE_AREA_PIXELS)
+ rectangles.push_back(mr);
+
+ }
+ // sort the rectangles top to bottom left to right
+ std::sort(rectangles.begin(), rectangles.end(), sort_lined_rectangles);
+
+ //cout << text_content << endl;
+ string::iterator end_it = utf8::find_invalid(text_content.begin(), text_content.end());
+ if (end_it != text_content.end()) {
+ cout << "Invalid UTF-8 encoding detected " << endl;
+ return 1;
+ }
+
+ show_debug_image(rectangles, thresholds[t]);
+ int text_content_length = utf8::distance(text_content.begin(), text_content.end());
+ if (rectangles.size() != text_content_length - 1)
+ {
+ cout << "Number of blobs (" << rectangles.size() << ") != number of characters (" << text_content_length << ")" << endl;
+ cout << "Skipping..." << endl;
+ //return 1;
+ continue;
+ }
+
+ string::iterator utf_iterator = text_content.begin();
+
+
+ for (unsigned int i = 0; i < rectangles.size(); i++)
+ {
+ Rect mr = rectangles[i];
+ Mat croppedChar = thresholds[t](mr);
+
+ int cp = utf8::next(utf_iterator, text_content.end());
+ stringstream ss;
+ ss << out_dir << "/" << utf8chr(cp) << "-" << font_sheet_index << "-" << t << "-" << i << ".png";
+
+ imwrite(ss.str(), croppedChar);
+
+ }
+ show_debug_image(rectangles, thresholds[t]);
+
+ }
+ }
+
+ return 0;
+}
+