Ignoring space (' ') character whenever Tesseract returns it

This commit is contained in:
Matt Hill
2015-06-04 22:53:55 -04:00
parent bb3edc1b06
commit dbe338deb1

View File

@@ -53,6 +53,8 @@ namespace alpr
void OCR::performOCR(PipelineData* pipeline_data) void OCR::performOCR(PipelineData* pipeline_data)
{ {
const int SPACE_CHAR_CODE = 32;
timespec startTime; timespec startTime;
getTimeMonotonic(&startTime); getTimeMonotonic(&startTime);
@@ -83,13 +85,14 @@ namespace alpr
{ {
const char* symbol = ri->GetUTF8Text(level); const char* symbol = ri->GetUTF8Text(level);
float conf = ri->Confidence(level); float conf = ri->Confidence(level);
bool dontcare; bool dontcare;
int fontindex = 0; int fontindex = 0;
int pointsize = 0; int pointsize = 0;
const char* fontName = ri->WordFontAttributes(&dontcare, &dontcare, &dontcare, &dontcare, &dontcare, &dontcare, &pointsize, &fontindex); const char* fontName = ri->WordFontAttributes(&dontcare, &dontcare, &dontcare, &dontcare, &dontcare, &dontcare, &pointsize, &fontindex);
if(symbol != 0 && pointsize >= config->ocrMinFontSize) // Ignore NULL pointers, spaces, and characters that are way too small to be valid
if(symbol != 0 && symbol[0] != SPACE_CHAR_CODE && pointsize >= config->ocrMinFontSize)
{ {
postProcessor.addLetter(string(symbol), j, conf); postProcessor.addLetter(string(symbol), j, conf);