Abstracted OCR function to run from base class

2025-10-05 21:37:10 +08:00 · 2016-07-02 10:29:27 -04:00
parent e3c88620b3
commit e4acb17c52
4 changed files with 117 additions and 77 deletions
--- a/src/openalpr/ocr/ocr.cpp
+++ b/src/openalpr/ocr/ocr.cpp
@@ -30,4 +30,33 @@ namespace alpr
  OCR::~OCR() {
  }

+  
+  void OCR::performOCR(PipelineData* pipeline_data)
+  {
+    
+    timespec startTime;
+    getTimeMonotonic(&startTime);
+
+    segment(pipeline_data);
+    
+    postProcessor.clear();
+
+
+    int absolute_charpos = 0;
+    for (unsigned int line_idx = 0; line_idx < pipeline_data->charRegions.size(); line_idx++)
+    {
+      std::vector<OcrChar> chars = recognize_line(line_idx, pipeline_data);
+      
+      for (uint32_t i = 0; i < chars.size(); i++)
+        postProcessor.addLetter(chars[i].letter, line_idx, chars[i].char_index, chars[i].confidence);
+    }
+    
+
+    if (config->debugTiming)
+    {
+      timespec endTime;
+      getTimeMonotonic(&endTime);
+      std::cout << "OCR Time: " << diffclock(startTime, endTime) << "ms." << std::endl;
+    }
+  }
 }
--- a/src/openalpr/ocr/ocr.h
+++ b/src/openalpr/ocr/ocr.h
@@ -25,16 +25,26 @@

 namespace alpr
 {
+  struct OcrChar
+  {
+    std::string letter;
+    int char_index;
+    float confidence;
+  };
+  
  class OCR {
  public:
    OCR(Config* config);
    virtual ~OCR();

-    virtual void performOCR(PipelineData* pipeline_data)=0;
+    void performOCR(PipelineData* pipeline_data);

    PostProcess postProcessor;

  protected:
+    virtual std::vector<OcrChar> recognize_line(int line_index, PipelineData* pipeline_data)=0;
+    virtual void segment(PipelineData* pipeline_data)=0;
+    
    Config* config;

  };
--- a/src/openalpr/ocr/tesseract_ocr.cpp
+++ b/src/openalpr/ocr/tesseract_ocr.cpp
@@ -20,6 +20,8 @@
 #include "tesseract_ocr.h"
 #include "config.h"

+#include "segmentation/charactersegmenter.h"
+
 using namespace std;
 using namespace cv;
 using namespace tesseract;
@@ -52,25 +54,11 @@ namespace alpr
    tesseract.End();
  }
  
-  void TesseractOcr::performOCR(PipelineData* pipeline_data)
-  {
+  std::vector<OcrChar> TesseractOcr::recognize_line(int line_idx, PipelineData* pipeline_data) {
+
    const int SPACE_CHAR_CODE = 32;
    
-    timespec startTime;
-    getTimeMonotonic(&startTime);
-
-    postProcessor.clear();
-
-    // Don't waste time on OCR processing if it is impossible to get sufficient characters
-    int total_char_spaces = 0;
-    for (unsigned int i = 0; i < pipeline_data->charRegions.size(); i++)
-      total_char_spaces += pipeline_data->charRegions[i].size();
-    if (total_char_spaces < config->postProcessMinCharacters)
-    {
-      pipeline_data->disqualify_reason = "Insufficient character boxes detected.  No OCR performed.";
-      pipeline_data->disqualified = true;
-      return;
-    }
+    std::vector<OcrChar> recognized_chars;
    
    for (unsigned int i = 0; i < pipeline_data->thresholds.size(); i++)
    {
@@ -80,77 +68,90 @@ namespace alpr
                          pipeline_data->thresholds[i].size().width, pipeline_data->thresholds[i].size().height, 
                          pipeline_data->thresholds[i].channels(), pipeline_data->thresholds[i].step1());

+ 
      int absolute_charpos = 0;
-      for (unsigned int line_idx = 0; line_idx < pipeline_data->charRegions.size(); line_idx++)
+
+      for (unsigned int j = 0; j < pipeline_data->charRegions[line_idx].size(); j++)
      {
-        for (unsigned int j = 0; j < pipeline_data->charRegions[line_idx].size(); j++)
+        Rect expandedRegion = expandRect( pipeline_data->charRegions[line_idx][j], 2, 2, pipeline_data->thresholds[i].cols, pipeline_data->thresholds[i].rows) ;
+
+        tesseract.SetRectangle(expandedRegion.x, expandedRegion.y, expandedRegion.width, expandedRegion.height);
+        tesseract.Recognize(NULL);
+
+        tesseract::ResultIterator* ri = tesseract.GetIterator();
+        tesseract::PageIteratorLevel level = tesseract::RIL_SYMBOL;
+        do
        {
-          Rect expandedRegion = expandRect( pipeline_data->charRegions[line_idx][j], 2, 2, pipeline_data->thresholds[i].cols, pipeline_data->thresholds[i].rows) ;
+          const char* symbol = ri->GetUTF8Text(level);
+          float conf = ri->Confidence(level);

-          tesseract.SetRectangle(expandedRegion.x, expandedRegion.y, expandedRegion.width, expandedRegion.height);
-          tesseract.Recognize(NULL);
+          bool dontcare;
+          int fontindex = 0;
+          int pointsize = 0;
+          const char* fontName = ri->WordFontAttributes(&dontcare, &dontcare, &dontcare, &dontcare, &dontcare, &dontcare, &pointsize, &fontindex);

-          tesseract::ResultIterator* ri = tesseract.GetIterator();
-          tesseract::PageIteratorLevel level = tesseract::RIL_SYMBOL;
-          do
+          // Ignore NULL pointers, spaces, and characters that are way too small to be valid
+          if(symbol != 0 && symbol[0] != SPACE_CHAR_CODE && pointsize >= config->ocrMinFontSize)
          {
-            const char* symbol = ri->GetUTF8Text(level);
-            float conf = ri->Confidence(level);
-
-            bool dontcare;
-            int fontindex = 0;
-            int pointsize = 0;
-            const char* fontName = ri->WordFontAttributes(&dontcare, &dontcare, &dontcare, &dontcare, &dontcare, &dontcare, &pointsize, &fontindex);
-
-            // Ignore NULL pointers, spaces, and characters that are way too small to be valid
-            if(symbol != 0 && symbol[0] != SPACE_CHAR_CODE && pointsize >= config->ocrMinFontSize)
-            {
-              postProcessor.addLetter(string(symbol), line_idx, absolute_charpos, conf);
-
-              if (this->config->debugOcr)
-                printf("charpos%d line%d: threshold %d:  symbol %s, conf: %f font: %s (index %d) size %dpx", absolute_charpos, line_idx, i, symbol, conf, fontName, fontindex, pointsize);
-
-              bool indent = false;
-              tesseract::ChoiceIterator ci(*ri);
-              do
-              {
-                const char* choice = ci.GetUTF8Text();
-                //1/17/2016 adt adding check to avoid double adding same character if ci is same as symbol. Otherwise first choice from ResultsIterator will get added twice when choiceIterator run.
-                if (string(symbol) != string(choice))
-                  postProcessor.addLetter(string(choice), line_idx, absolute_charpos, ci.Confidence());
-                else
-                {
-                  // Explictly double-adding the first character.  This leads to higher accuracy right now, likely because other sections of code
-                  // have expected it and compensated. 
-                  // TODO: Figure out how to remove this double-counting of the first letter without impacting accuracy
-                  postProcessor.addLetter(string(choice), line_idx, absolute_charpos, ci.Confidence());
-                }
-                if (this->config->debugOcr)
-                {
-                  if (indent) printf("\t\t ");
-                  printf("\t- ");
-                  printf("%s conf: %f\n", choice, ci.Confidence());
-                }
-
-                indent = true;
-              }
-              while(ci.Next());
-              
-            }
+            OcrChar c;
+            c.char_index = absolute_charpos;
+            c.confidence = conf;
+            c.letter = string(symbol);
+            recognized_chars.push_back(c);

            if (this->config->debugOcr)
-              printf("---------------------------------------------\n");
+              printf("charpos%d line%d: threshold %d:  symbol %s, conf: %f font: %s (index %d) size %dpx", absolute_charpos, line_idx, i, symbol, conf, fontName, fontindex, pointsize);
+
+            bool indent = false;
+            tesseract::ChoiceIterator ci(*ri);
+            do
+            {
+              const char* choice = ci.GetUTF8Text();
+              
+              OcrChar c2;
+              c2.char_index = absolute_charpos;
+              c2.confidence = ci.Confidence();
+              c2.letter = string(choice);
+              
+              //1/17/2016 adt adding check to avoid double adding same character if ci is same as symbol. Otherwise first choice from ResultsIterator will get added twice when choiceIterator run.
+              if (string(symbol) != string(choice))
+                recognized_chars.push_back(c2);
+              else
+              {
+                // Explictly double-adding the first character.  This leads to higher accuracy right now, likely because other sections of code
+                // have expected it and compensated. 
+                // TODO: Figure out how to remove this double-counting of the first letter without impacting accuracy
+                recognized_chars.push_back(c2);
+              }
+              if (this->config->debugOcr)
+              {
+                if (indent) printf("\t\t ");
+                printf("\t- ");
+                printf("%s conf: %f\n", choice, ci.Confidence());
+              }
+
+              indent = true;
+            }
+            while(ci.Next());

-            delete[] symbol;
          }
-          while((ri->Next(level)));

-          delete ri;
+          if (this->config->debugOcr)
+            printf("---------------------------------------------\n");

-          absolute_charpos++;
+          delete[] symbol;
        }
+        while((ri->Next(level)));
+
+        delete ri;
+
+        absolute_charpos++;
      }
+      
    }
+    
+    return recognized_chars;
+  }
  void TesseractOcr::segment(PipelineData* pipeline_data) {

    CharacterSegmenter segmenter(pipeline_data);
--- a/src/openalpr/ocr/tesseract_ocr.h
+++ b/src/openalpr/ocr/tesseract_ocr.h
@@ -45,11 +45,11 @@ namespace alpr
      TesseractOcr(Config* config);
      virtual ~TesseractOcr();

-      void performOCR(PipelineData* pipeline_data);


    private:

+      std::vector<OcrChar> recognize_line(int line_index, PipelineData* pipeline_data);
      void segment(PipelineData* pipeline_data);
    
      tesseract::TessBaseAPI tesseract;