Supporting unicode characters in classifychars OCR training utility

This commit is contained in:
Matt Hill
2015-05-08 22:30:32 -04:00
parent a8499e3d0d
commit e95ce76c02

View File

@@ -24,6 +24,7 @@
#include <stdio.h> #include <stdio.h>
#include <sys/stat.h> #include <sys/stat.h>
#include "postprocess/regexrule.h"
#include "licenseplatecandidate.h" #include "licenseplatecandidate.h"
#include "stateidentifier.h" #include "stateidentifier.h"
#include "utility.h" #include "utility.h"
@@ -52,6 +53,7 @@ const int DASHBOARD_COLUMNS = 9;
const int LEFT_ARROW_KEY = 81; const int LEFT_ARROW_KEY = 81;
const int RIGHT_ARROW_KEY = 83; const int RIGHT_ARROW_KEY = 83;
const int SPACE_KEY = 32; const int SPACE_KEY = 32;
const string SPACE = " ";
const int ENTER_KEY = 10; const int ENTER_KEY = 10;
const int ESCAPE_KEY = 27; const int ESCAPE_KEY = 27;
@@ -62,7 +64,7 @@ const int DASHBOARD_COLUMNS = 3;
#endif #endif
void showDashboard(vector<Mat> images, vector<bool> selectedImages, int selectedIndex); void showDashboard(vector<Mat> images, vector<bool> selectedImages, int selectedIndex);
vector<char> showCharSelection(Mat image, vector<Rect> charRegions, string state); vector<string> showCharSelection(Mat image, vector<Rect> charRegions, string state);
int main( int argc, const char** argv ) int main( int argc, const char** argv )
{ {
@@ -157,10 +159,10 @@ int main( int argc, const char** argv )
int curDashboardSelection = 0; int curDashboardSelection = 0;
vector<char> humanInputs(pipeline_data.charRegions.size()); vector<string> humanInputs(pipeline_data.charRegions.size());
for (int z = 0; z < pipeline_data.charRegions.size(); z++) for (int z = 0; z < pipeline_data.charRegions.size(); z++)
humanInputs[z] = ' '; humanInputs[z] = SPACE;
showDashboard(pipeline_data.thresholds, selectedBoxes, 0); showDashboard(pipeline_data.thresholds, selectedBoxes, 0);
@@ -196,7 +198,7 @@ int main( int argc, const char** argv )
{ {
if (pipeline_data.charRegions.size() > 0) if (pipeline_data.charRegions.size() > 0)
{ {
vector<char> tempdata = showCharSelection(pipeline_data.thresholds[curDashboardSelection], pipeline_data.charRegions, statecodestr); vector<string> tempdata = showCharSelection(pipeline_data.thresholds[curDashboardSelection], pipeline_data.charRegions, statecodestr);
for (int c = 0; c < pipeline_data.charRegions.size(); c++) for (int c = 0; c < pipeline_data.charRegions.size(); c++)
humanInputs[c] = tempdata[c]; humanInputs[c] = tempdata[c];
} }
@@ -210,15 +212,8 @@ int main( int argc, const char** argv )
selectedBoxes[curDashboardSelection] = !selectedBoxes[curDashboardSelection]; selectedBoxes[curDashboardSelection] = !selectedBoxes[curDashboardSelection];
showDashboard(pipeline_data.thresholds, selectedBoxes, curDashboardSelection); showDashboard(pipeline_data.thresholds, selectedBoxes, curDashboardSelection);
} }
else if (waitkey == 's' || waitkey == 'S' || waitkey == 'W') else if (waitkey == 's' || waitkey == 'S' )
{ {
if (waitkey == 'W')
{
selectedBoxes[curDashboardSelection] = true;
showDashboard(pipeline_data.thresholds, selectedBoxes, curDashboardSelection);
const std::string& ocr_str = ocr.postProcessor.bestChars;
humanInputs.assign(ocr_str.begin(), ocr_str.end());
}
bool somethingSelected = false; bool somethingSelected = false;
bool chardataTagged = false; bool chardataTagged = false;
@@ -232,7 +227,7 @@ int main( int argc, const char** argv )
} }
for (int c = 0; c < pipeline_data.charRegions.size(); c++) for (int c = 0; c < pipeline_data.charRegions.size(); c++)
{ {
if (humanInputs[c] != ' ') if (humanInputs[c] != SPACE)
{ {
chardataTagged = true; chardataTagged = true;
break; break;
@@ -243,7 +238,7 @@ int main( int argc, const char** argv )
{ {
for (int c = 0; c < pipeline_data.charRegions.size(); c++) for (int c = 0; c < pipeline_data.charRegions.size(); c++)
{ {
if (humanInputs[c] == ' ') if (humanInputs[c] == SPACE)
continue; continue;
for (int t = 0; t < pipeline_data.thresholds.size(); t++) for (int t = 0; t < pipeline_data.thresholds.size(); t++)
@@ -314,15 +309,17 @@ void showDashboard(vector<Mat> images, vector<bool> selectedImages, int selected
imshow("Selection dashboard", dashboard); imshow("Selection dashboard", dashboard);
} }
vector<char> showCharSelection(Mat image, vector<Rect> charRegions, string state) vector<string> showCharSelection(Mat image, vector<Rect> charRegions, string state)
{ {
int curCharIdx = 0; int curCharIdx = 0;
vector<char> humanInputs(charRegions.size()); vector<string> humanInputs(charRegions.size());
for (int i = 0; i < charRegions.size(); i++) for (int i = 0; i < charRegions.size(); i++)
humanInputs[i] = (char) SPACE_KEY; humanInputs[i] = SPACE;
char waitkey = (char) waitKey(50); RegexRule regex_rule("", "[\\p{Digit}\\p{Alpha}]");
int16_t waitkey = waitKey(50);
while (waitkey != ENTER_KEY && waitkey != ESCAPE_KEY) while (waitkey != ENTER_KEY && waitkey != ESCAPE_KEY)
{ {
Mat imgCopy(image.size(), image.type()); Mat imgCopy(image.size(), image.type());
@@ -337,15 +334,15 @@ vector<char> showCharSelection(Mat image, vector<Rect> charRegions, string state
curCharIdx--; curCharIdx--;
else if (waitkey == RIGHT_ARROW_KEY ) else if (waitkey == RIGHT_ARROW_KEY )
curCharIdx++; curCharIdx++;
else if ((waitkey >= '0' && waitkey <= '9') || (waitkey >= 'a' && waitkey <= 'z') || waitkey == SPACE_KEY) else if (waitkey > 0 && regex_rule.match(utf8chr(waitkey))) // Verify that it's an actual character
{ {
// Save the character to disk // Save the character to disk
humanInputs[curCharIdx] = toupper((char) waitkey); humanInputs[curCharIdx] = utf8chr(waitkey);
curCharIdx++; curCharIdx++;
if (curCharIdx >= charRegions.size()) if (curCharIdx >= charRegions.size())
{ {
waitkey = (char) ENTER_KEY; waitkey = ENTER_KEY;
break; break;
} }
} }
@@ -355,7 +352,7 @@ vector<char> showCharSelection(Mat image, vector<Rect> charRegions, string state
if (curCharIdx >= charRegions.size()) if (curCharIdx >= charRegions.size())
curCharIdx = charRegions.size() -1; curCharIdx = charRegions.size() -1;
waitkey = (char) waitKey(50); waitkey = waitKey(50);
} }
if (waitkey == ENTER_KEY) if (waitkey == ENTER_KEY)
@@ -363,7 +360,7 @@ vector<char> showCharSelection(Mat image, vector<Rect> charRegions, string state
// Save all the inputs // Save all the inputs
for (int i = 0; i < charRegions.size(); i++) for (int i = 0; i < charRegions.size(); i++)
{ {
if (humanInputs[i] != (char) SPACE_KEY) if (humanInputs[i] != SPACE)
cout << "Tagged " << state << " char code: '" << humanInputs[i] << "' at char position: " << i << endl; cout << "Tagged " << state << " char code: '" << humanInputs[i] << "' at char position: " << i << endl;
} }
} }