Supporting unicode characters in classifychars OCR training utility

This commit is contained in:
Matt Hill
2015-05-08 22:30:32 -04:00
parent a8499e3d0d
commit e95ce76c02

View File

@@ -24,6 +24,7 @@
#include <stdio.h>
#include <sys/stat.h>
#include "postprocess/regexrule.h"
#include "licenseplatecandidate.h"
#include "stateidentifier.h"
#include "utility.h"
@@ -52,6 +53,7 @@ const int DASHBOARD_COLUMNS = 9;
const int LEFT_ARROW_KEY = 81;
const int RIGHT_ARROW_KEY = 83;
const int SPACE_KEY = 32;
const string SPACE = " ";
const int ENTER_KEY = 10;
const int ESCAPE_KEY = 27;
@@ -62,7 +64,7 @@ const int DASHBOARD_COLUMNS = 3;
#endif
void showDashboard(vector<Mat> images, vector<bool> selectedImages, int selectedIndex);
vector<char> showCharSelection(Mat image, vector<Rect> charRegions, string state);
vector<string> showCharSelection(Mat image, vector<Rect> charRegions, string state);
int main( int argc, const char** argv )
{
@@ -157,10 +159,10 @@ int main( int argc, const char** argv )
int curDashboardSelection = 0;
vector<char> humanInputs(pipeline_data.charRegions.size());
vector<string> humanInputs(pipeline_data.charRegions.size());
for (int z = 0; z < pipeline_data.charRegions.size(); z++)
humanInputs[z] = ' ';
humanInputs[z] = SPACE;
showDashboard(pipeline_data.thresholds, selectedBoxes, 0);
@@ -196,7 +198,7 @@ int main( int argc, const char** argv )
{
if (pipeline_data.charRegions.size() > 0)
{
vector<char> tempdata = showCharSelection(pipeline_data.thresholds[curDashboardSelection], pipeline_data.charRegions, statecodestr);
vector<string> tempdata = showCharSelection(pipeline_data.thresholds[curDashboardSelection], pipeline_data.charRegions, statecodestr);
for (int c = 0; c < pipeline_data.charRegions.size(); c++)
humanInputs[c] = tempdata[c];
}
@@ -210,15 +212,8 @@ int main( int argc, const char** argv )
selectedBoxes[curDashboardSelection] = !selectedBoxes[curDashboardSelection];
showDashboard(pipeline_data.thresholds, selectedBoxes, curDashboardSelection);
}
else if (waitkey == 's' || waitkey == 'S' || waitkey == 'W')
else if (waitkey == 's' || waitkey == 'S' )
{
if (waitkey == 'W')
{
selectedBoxes[curDashboardSelection] = true;
showDashboard(pipeline_data.thresholds, selectedBoxes, curDashboardSelection);
const std::string& ocr_str = ocr.postProcessor.bestChars;
humanInputs.assign(ocr_str.begin(), ocr_str.end());
}
bool somethingSelected = false;
bool chardataTagged = false;
@@ -232,7 +227,7 @@ int main( int argc, const char** argv )
}
for (int c = 0; c < pipeline_data.charRegions.size(); c++)
{
if (humanInputs[c] != ' ')
if (humanInputs[c] != SPACE)
{
chardataTagged = true;
break;
@@ -243,7 +238,7 @@ int main( int argc, const char** argv )
{
for (int c = 0; c < pipeline_data.charRegions.size(); c++)
{
if (humanInputs[c] == ' ')
if (humanInputs[c] == SPACE)
continue;
for (int t = 0; t < pipeline_data.thresholds.size(); t++)
@@ -314,15 +309,17 @@ void showDashboard(vector<Mat> images, vector<bool> selectedImages, int selected
imshow("Selection dashboard", dashboard);
}
vector<char> showCharSelection(Mat image, vector<Rect> charRegions, string state)
vector<string> showCharSelection(Mat image, vector<Rect> charRegions, string state)
{
int curCharIdx = 0;
vector<char> humanInputs(charRegions.size());
vector<string> humanInputs(charRegions.size());
for (int i = 0; i < charRegions.size(); i++)
humanInputs[i] = (char) SPACE_KEY;
humanInputs[i] = SPACE;
char waitkey = (char) waitKey(50);
RegexRule regex_rule("", "[\\p{Digit}\\p{Alpha}]");
int16_t waitkey = waitKey(50);
while (waitkey != ENTER_KEY && waitkey != ESCAPE_KEY)
{
Mat imgCopy(image.size(), image.type());
@@ -337,15 +334,15 @@ vector<char> showCharSelection(Mat image, vector<Rect> charRegions, string state
curCharIdx--;
else if (waitkey == RIGHT_ARROW_KEY )
curCharIdx++;
else if ((waitkey >= '0' && waitkey <= '9') || (waitkey >= 'a' && waitkey <= 'z') || waitkey == SPACE_KEY)
else if (waitkey > 0 && regex_rule.match(utf8chr(waitkey))) // Verify that it's an actual character
{
// Save the character to disk
humanInputs[curCharIdx] = toupper((char) waitkey);
humanInputs[curCharIdx] = utf8chr(waitkey);
curCharIdx++;
if (curCharIdx >= charRegions.size())
{
waitkey = (char) ENTER_KEY;
waitkey = ENTER_KEY;
break;
}
}
@@ -355,7 +352,7 @@ vector<char> showCharSelection(Mat image, vector<Rect> charRegions, string state
if (curCharIdx >= charRegions.size())
curCharIdx = charRegions.size() -1;
waitkey = (char) waitKey(50);
waitkey = waitKey(50);
}
if (waitkey == ENTER_KEY)
@@ -363,7 +360,7 @@ vector<char> showCharSelection(Mat image, vector<Rect> charRegions, string state
// Save all the inputs
for (int i = 0; i < charRegions.size(); i++)
{
if (humanInputs[i] != (char) SPACE_KEY)
if (humanInputs[i] != SPACE)
cout << "Tagged " << state << " char code: '" << humanInputs[i] << "' at char position: " << i << endl;
}
}