Abstracted OCR function to run from base class

This commit is contained in:
Matt Hill
2016-07-02 10:29:27 -04:00
parent e3c88620b3
commit e4acb17c52
4 changed files with 117 additions and 77 deletions

View File

@@ -30,4 +30,33 @@ namespace alpr
OCR::~OCR() {
}
void OCR::performOCR(PipelineData* pipeline_data)
{
timespec startTime;
getTimeMonotonic(&startTime);
segment(pipeline_data);
postProcessor.clear();
int absolute_charpos = 0;
for (unsigned int line_idx = 0; line_idx < pipeline_data->charRegions.size(); line_idx++)
{
std::vector<OcrChar> chars = recognize_line(line_idx, pipeline_data);
for (uint32_t i = 0; i < chars.size(); i++)
postProcessor.addLetter(chars[i].letter, line_idx, chars[i].char_index, chars[i].confidence);
}
if (config->debugTiming)
{
timespec endTime;
getTimeMonotonic(&endTime);
std::cout << "OCR Time: " << diffclock(startTime, endTime) << "ms." << std::endl;
}
}
}

View File

@@ -25,16 +25,26 @@
namespace alpr
{
struct OcrChar
{
std::string letter;
int char_index;
float confidence;
};
class OCR {
public:
OCR(Config* config);
virtual ~OCR();
virtual void performOCR(PipelineData* pipeline_data)=0;
void performOCR(PipelineData* pipeline_data);
PostProcess postProcessor;
protected:
virtual std::vector<OcrChar> recognize_line(int line_index, PipelineData* pipeline_data)=0;
virtual void segment(PipelineData* pipeline_data)=0;
Config* config;
};

View File

@@ -20,6 +20,8 @@
#include "tesseract_ocr.h"
#include "config.h"
#include "segmentation/charactersegmenter.h"
using namespace std;
using namespace cv;
using namespace tesseract;
@@ -52,25 +54,11 @@ namespace alpr
tesseract.End();
}
void TesseractOcr::performOCR(PipelineData* pipeline_data)
{
std::vector<OcrChar> TesseractOcr::recognize_line(int line_idx, PipelineData* pipeline_data) {
const int SPACE_CHAR_CODE = 32;
timespec startTime;
getTimeMonotonic(&startTime);
postProcessor.clear();
// Don't waste time on OCR processing if it is impossible to get sufficient characters
int total_char_spaces = 0;
for (unsigned int i = 0; i < pipeline_data->charRegions.size(); i++)
total_char_spaces += pipeline_data->charRegions[i].size();
if (total_char_spaces < config->postProcessMinCharacters)
{
pipeline_data->disqualify_reason = "Insufficient character boxes detected. No OCR performed.";
pipeline_data->disqualified = true;
return;
}
std::vector<OcrChar> recognized_chars;
for (unsigned int i = 0; i < pipeline_data->thresholds.size(); i++)
{
@@ -80,77 +68,90 @@ namespace alpr
pipeline_data->thresholds[i].size().width, pipeline_data->thresholds[i].size().height,
pipeline_data->thresholds[i].channels(), pipeline_data->thresholds[i].step1());
int absolute_charpos = 0;
for (unsigned int line_idx = 0; line_idx < pipeline_data->charRegions.size(); line_idx++)
for (unsigned int j = 0; j < pipeline_data->charRegions[line_idx].size(); j++)
{
for (unsigned int j = 0; j < pipeline_data->charRegions[line_idx].size(); j++)
Rect expandedRegion = expandRect( pipeline_data->charRegions[line_idx][j], 2, 2, pipeline_data->thresholds[i].cols, pipeline_data->thresholds[i].rows) ;
tesseract.SetRectangle(expandedRegion.x, expandedRegion.y, expandedRegion.width, expandedRegion.height);
tesseract.Recognize(NULL);
tesseract::ResultIterator* ri = tesseract.GetIterator();
tesseract::PageIteratorLevel level = tesseract::RIL_SYMBOL;
do
{
Rect expandedRegion = expandRect( pipeline_data->charRegions[line_idx][j], 2, 2, pipeline_data->thresholds[i].cols, pipeline_data->thresholds[i].rows) ;
const char* symbol = ri->GetUTF8Text(level);
float conf = ri->Confidence(level);
tesseract.SetRectangle(expandedRegion.x, expandedRegion.y, expandedRegion.width, expandedRegion.height);
tesseract.Recognize(NULL);
bool dontcare;
int fontindex = 0;
int pointsize = 0;
const char* fontName = ri->WordFontAttributes(&dontcare, &dontcare, &dontcare, &dontcare, &dontcare, &dontcare, &pointsize, &fontindex);
tesseract::ResultIterator* ri = tesseract.GetIterator();
tesseract::PageIteratorLevel level = tesseract::RIL_SYMBOL;
do
// Ignore NULL pointers, spaces, and characters that are way too small to be valid
if(symbol != 0 && symbol[0] != SPACE_CHAR_CODE && pointsize >= config->ocrMinFontSize)
{
const char* symbol = ri->GetUTF8Text(level);
float conf = ri->Confidence(level);
bool dontcare;
int fontindex = 0;
int pointsize = 0;
const char* fontName = ri->WordFontAttributes(&dontcare, &dontcare, &dontcare, &dontcare, &dontcare, &dontcare, &pointsize, &fontindex);
// Ignore NULL pointers, spaces, and characters that are way too small to be valid
if(symbol != 0 && symbol[0] != SPACE_CHAR_CODE && pointsize >= config->ocrMinFontSize)
{
postProcessor.addLetter(string(symbol), line_idx, absolute_charpos, conf);
if (this->config->debugOcr)
printf("charpos%d line%d: threshold %d: symbol %s, conf: %f font: %s (index %d) size %dpx", absolute_charpos, line_idx, i, symbol, conf, fontName, fontindex, pointsize);
bool indent = false;
tesseract::ChoiceIterator ci(*ri);
do
{
const char* choice = ci.GetUTF8Text();
//1/17/2016 adt adding check to avoid double adding same character if ci is same as symbol. Otherwise first choice from ResultsIterator will get added twice when choiceIterator run.
if (string(symbol) != string(choice))
postProcessor.addLetter(string(choice), line_idx, absolute_charpos, ci.Confidence());
else
{
// Explictly double-adding the first character. This leads to higher accuracy right now, likely because other sections of code
// have expected it and compensated.
// TODO: Figure out how to remove this double-counting of the first letter without impacting accuracy
postProcessor.addLetter(string(choice), line_idx, absolute_charpos, ci.Confidence());
}
if (this->config->debugOcr)
{
if (indent) printf("\t\t ");
printf("\t- ");
printf("%s conf: %f\n", choice, ci.Confidence());
}
indent = true;
}
while(ci.Next());
}
OcrChar c;
c.char_index = absolute_charpos;
c.confidence = conf;
c.letter = string(symbol);
recognized_chars.push_back(c);
if (this->config->debugOcr)
printf("---------------------------------------------\n");
printf("charpos%d line%d: threshold %d: symbol %s, conf: %f font: %s (index %d) size %dpx", absolute_charpos, line_idx, i, symbol, conf, fontName, fontindex, pointsize);
bool indent = false;
tesseract::ChoiceIterator ci(*ri);
do
{
const char* choice = ci.GetUTF8Text();
OcrChar c2;
c2.char_index = absolute_charpos;
c2.confidence = ci.Confidence();
c2.letter = string(choice);
//1/17/2016 adt adding check to avoid double adding same character if ci is same as symbol. Otherwise first choice from ResultsIterator will get added twice when choiceIterator run.
if (string(symbol) != string(choice))
recognized_chars.push_back(c2);
else
{
// Explictly double-adding the first character. This leads to higher accuracy right now, likely because other sections of code
// have expected it and compensated.
// TODO: Figure out how to remove this double-counting of the first letter without impacting accuracy
recognized_chars.push_back(c2);
}
if (this->config->debugOcr)
{
if (indent) printf("\t\t ");
printf("\t- ");
printf("%s conf: %f\n", choice, ci.Confidence());
}
indent = true;
}
while(ci.Next());
delete[] symbol;
}
while((ri->Next(level)));
delete ri;
if (this->config->debugOcr)
printf("---------------------------------------------\n");
absolute_charpos++;
delete[] symbol;
}
while((ri->Next(level)));
delete ri;
absolute_charpos++;
}
}
return recognized_chars;
}
void TesseractOcr::segment(PipelineData* pipeline_data) {
CharacterSegmenter segmenter(pipeline_data);

View File

@@ -45,11 +45,11 @@ namespace alpr
TesseractOcr(Config* config);
virtual ~TesseractOcr();
void performOCR(PipelineData* pipeline_data);
private:
std::vector<OcrChar> recognize_line(int line_index, PipelineData* pipeline_data);
void segment(PipelineData* pipeline_data);
tesseract::TessBaseAPI tesseract;