diff --git a/src/openalpr/ocr/ocr.cpp b/src/openalpr/ocr/ocr.cpp index ebfe95b..6d63ae1 100644 --- a/src/openalpr/ocr/ocr.cpp +++ b/src/openalpr/ocr/ocr.cpp @@ -30,4 +30,33 @@ namespace alpr OCR::~OCR() { } + + void OCR::performOCR(PipelineData* pipeline_data) + { + + timespec startTime; + getTimeMonotonic(&startTime); + + segment(pipeline_data); + + postProcessor.clear(); + + + int absolute_charpos = 0; + for (unsigned int line_idx = 0; line_idx < pipeline_data->charRegions.size(); line_idx++) + { + std::vector chars = recognize_line(line_idx, pipeline_data); + + for (uint32_t i = 0; i < chars.size(); i++) + postProcessor.addLetter(chars[i].letter, line_idx, chars[i].char_index, chars[i].confidence); + } + + + if (config->debugTiming) + { + timespec endTime; + getTimeMonotonic(&endTime); + std::cout << "OCR Time: " << diffclock(startTime, endTime) << "ms." << std::endl; + } + } } \ No newline at end of file diff --git a/src/openalpr/ocr/ocr.h b/src/openalpr/ocr/ocr.h index 291caf8..be53b66 100644 --- a/src/openalpr/ocr/ocr.h +++ b/src/openalpr/ocr/ocr.h @@ -25,16 +25,26 @@ namespace alpr { + struct OcrChar + { + std::string letter; + int char_index; + float confidence; + }; + class OCR { public: OCR(Config* config); virtual ~OCR(); - virtual void performOCR(PipelineData* pipeline_data)=0; + void performOCR(PipelineData* pipeline_data); PostProcess postProcessor; protected: + virtual std::vector recognize_line(int line_index, PipelineData* pipeline_data)=0; + virtual void segment(PipelineData* pipeline_data)=0; + Config* config; }; diff --git a/src/openalpr/ocr/tesseract_ocr.cpp b/src/openalpr/ocr/tesseract_ocr.cpp index a239d97..55eeaf6 100644 --- a/src/openalpr/ocr/tesseract_ocr.cpp +++ b/src/openalpr/ocr/tesseract_ocr.cpp @@ -20,6 +20,8 @@ #include "tesseract_ocr.h" #include "config.h" +#include "segmentation/charactersegmenter.h" + using namespace std; using namespace cv; using namespace tesseract; @@ -51,27 +53,13 @@ namespace alpr { tesseract.End(); } + + std::vector TesseractOcr::recognize_line(int line_idx, PipelineData* pipeline_data) { - void TesseractOcr::performOCR(PipelineData* pipeline_data) - { const int SPACE_CHAR_CODE = 32; - timespec startTime; - getTimeMonotonic(&startTime); - - postProcessor.clear(); - - // Don't waste time on OCR processing if it is impossible to get sufficient characters - int total_char_spaces = 0; - for (unsigned int i = 0; i < pipeline_data->charRegions.size(); i++) - total_char_spaces += pipeline_data->charRegions[i].size(); - if (total_char_spaces < config->postProcessMinCharacters) - { - pipeline_data->disqualify_reason = "Insufficient character boxes detected. No OCR performed."; - pipeline_data->disqualified = true; - return; - } - + std::vector recognized_chars; + for (unsigned int i = 0; i < pipeline_data->thresholds.size(); i++) { // Make it black text on white background @@ -80,77 +68,90 @@ namespace alpr pipeline_data->thresholds[i].size().width, pipeline_data->thresholds[i].size().height, pipeline_data->thresholds[i].channels(), pipeline_data->thresholds[i].step1()); + int absolute_charpos = 0; - for (unsigned int line_idx = 0; line_idx < pipeline_data->charRegions.size(); line_idx++) + + for (unsigned int j = 0; j < pipeline_data->charRegions[line_idx].size(); j++) { - for (unsigned int j = 0; j < pipeline_data->charRegions[line_idx].size(); j++) + Rect expandedRegion = expandRect( pipeline_data->charRegions[line_idx][j], 2, 2, pipeline_data->thresholds[i].cols, pipeline_data->thresholds[i].rows) ; + + tesseract.SetRectangle(expandedRegion.x, expandedRegion.y, expandedRegion.width, expandedRegion.height); + tesseract.Recognize(NULL); + + tesseract::ResultIterator* ri = tesseract.GetIterator(); + tesseract::PageIteratorLevel level = tesseract::RIL_SYMBOL; + do { - Rect expandedRegion = expandRect( pipeline_data->charRegions[line_idx][j], 2, 2, pipeline_data->thresholds[i].cols, pipeline_data->thresholds[i].rows) ; + const char* symbol = ri->GetUTF8Text(level); + float conf = ri->Confidence(level); - tesseract.SetRectangle(expandedRegion.x, expandedRegion.y, expandedRegion.width, expandedRegion.height); - tesseract.Recognize(NULL); + bool dontcare; + int fontindex = 0; + int pointsize = 0; + const char* fontName = ri->WordFontAttributes(&dontcare, &dontcare, &dontcare, &dontcare, &dontcare, &dontcare, &pointsize, &fontindex); - tesseract::ResultIterator* ri = tesseract.GetIterator(); - tesseract::PageIteratorLevel level = tesseract::RIL_SYMBOL; - do + // Ignore NULL pointers, spaces, and characters that are way too small to be valid + if(symbol != 0 && symbol[0] != SPACE_CHAR_CODE && pointsize >= config->ocrMinFontSize) { - const char* symbol = ri->GetUTF8Text(level); - float conf = ri->Confidence(level); - - bool dontcare; - int fontindex = 0; - int pointsize = 0; - const char* fontName = ri->WordFontAttributes(&dontcare, &dontcare, &dontcare, &dontcare, &dontcare, &dontcare, &pointsize, &fontindex); - - // Ignore NULL pointers, spaces, and characters that are way too small to be valid - if(symbol != 0 && symbol[0] != SPACE_CHAR_CODE && pointsize >= config->ocrMinFontSize) - { - postProcessor.addLetter(string(symbol), line_idx, absolute_charpos, conf); - - if (this->config->debugOcr) - printf("charpos%d line%d: threshold %d: symbol %s, conf: %f font: %s (index %d) size %dpx", absolute_charpos, line_idx, i, symbol, conf, fontName, fontindex, pointsize); - - bool indent = false; - tesseract::ChoiceIterator ci(*ri); - do - { - const char* choice = ci.GetUTF8Text(); - //1/17/2016 adt adding check to avoid double adding same character if ci is same as symbol. Otherwise first choice from ResultsIterator will get added twice when choiceIterator run. - if (string(symbol) != string(choice)) - postProcessor.addLetter(string(choice), line_idx, absolute_charpos, ci.Confidence()); - else - { - // Explictly double-adding the first character. This leads to higher accuracy right now, likely because other sections of code - // have expected it and compensated. - // TODO: Figure out how to remove this double-counting of the first letter without impacting accuracy - postProcessor.addLetter(string(choice), line_idx, absolute_charpos, ci.Confidence()); - } - if (this->config->debugOcr) - { - if (indent) printf("\t\t "); - printf("\t- "); - printf("%s conf: %f\n", choice, ci.Confidence()); - } - - indent = true; - } - while(ci.Next()); - - } + OcrChar c; + c.char_index = absolute_charpos; + c.confidence = conf; + c.letter = string(symbol); + recognized_chars.push_back(c); if (this->config->debugOcr) - printf("---------------------------------------------\n"); + printf("charpos%d line%d: threshold %d: symbol %s, conf: %f font: %s (index %d) size %dpx", absolute_charpos, line_idx, i, symbol, conf, fontName, fontindex, pointsize); + + bool indent = false; + tesseract::ChoiceIterator ci(*ri); + do + { + const char* choice = ci.GetUTF8Text(); + + OcrChar c2; + c2.char_index = absolute_charpos; + c2.confidence = ci.Confidence(); + c2.letter = string(choice); + + //1/17/2016 adt adding check to avoid double adding same character if ci is same as symbol. Otherwise first choice from ResultsIterator will get added twice when choiceIterator run. + if (string(symbol) != string(choice)) + recognized_chars.push_back(c2); + else + { + // Explictly double-adding the first character. This leads to higher accuracy right now, likely because other sections of code + // have expected it and compensated. + // TODO: Figure out how to remove this double-counting of the first letter without impacting accuracy + recognized_chars.push_back(c2); + } + if (this->config->debugOcr) + { + if (indent) printf("\t\t "); + printf("\t- "); + printf("%s conf: %f\n", choice, ci.Confidence()); + } + + indent = true; + } + while(ci.Next()); - delete[] symbol; } - while((ri->Next(level))); - delete ri; - - absolute_charpos++; + if (this->config->debugOcr) + printf("---------------------------------------------\n"); + + delete[] symbol; } + while((ri->Next(level))); + + delete ri; + + absolute_charpos++; } + } + + return recognized_chars; + } void TesseractOcr::segment(PipelineData* pipeline_data) { CharacterSegmenter segmenter(pipeline_data); diff --git a/src/openalpr/ocr/tesseract_ocr.h b/src/openalpr/ocr/tesseract_ocr.h index 42807e8..f3f1974 100644 --- a/src/openalpr/ocr/tesseract_ocr.h +++ b/src/openalpr/ocr/tesseract_ocr.h @@ -45,11 +45,11 @@ namespace alpr TesseractOcr(Config* config); virtual ~TesseractOcr(); - void performOCR(PipelineData* pipeline_data); private: + std::vector recognize_line(int line_index, PipelineData* pipeline_data); void segment(PipelineData* pipeline_data); tesseract::TessBaseAPI tesseract;