Added support in support for onigurama and utf8

2025-10-06 04:06:49 +08:00 · 2015-05-03 14:02:06 -04:00
parent be5c3b307c
commit c5324258f4
5 changed files with 226 additions and 116 deletions
--- a/src/openalpr/postprocess/postprocess.cpp
+++ b/src/openalpr/postprocess/postprocess.cpp
@@ -457,80 +457,4 @@ namespace alpr
    return true;
  }
  RegexRule::~RegexRule()
  {
  }
  RegexRule::RegexRule(string region, string pattern)
  {
    this->original = pattern;
    this->region = region;
    numchars = 0;
    for (int i = 0; i < pattern.size(); i++)
    {
      if (pattern.at(i) == '[')
      {
        while (pattern.at(i) != ']' )
        {
          this->regex = this->regex + pattern.at(i);
          i++;
        }
        this->regex = this->regex + ']';
      }
      else if (pattern.at(i) == '?')
      {
        this->regex = this->regex + '.';
        this->skipPositions.push_back(numchars);
      }
      else if (pattern.at(i) == '@')
      {
        this->regex = this->regex + "\\a";
      }
      else if (pattern.at(i) == '#')
      {
        this->regex = this->regex + "\\d";
      }
      numchars++;
    }
    trexp.Compile(this->regex.c_str());
    //cout << "AA " << this->region << ": " << original << " regex: " << regex << endl;
    //for (int z = 0; z < this->skipPositions.size(); z++)
    //  cout << "AA Skip position: " << skipPositions[z] << endl;
  }
  bool RegexRule::match(string text)
  {
    if (text.length() != numchars)
      return false;
    return trexp.Match(text.c_str());
  }
  string RegexRule::filterSkips(string text)
  {
    string response = "";
    for (int i = 0; i < text.size(); i++)
    {
      bool skip = false;
      for (int j = 0; j < skipPositions.size(); j++)
      {
        if (skipPositions[j] == i)
        {
          skip = true;
          break;
        }
      }
      if (skip == false)
        response = response + text[i];
    }
    return response;
  }
 }
--- a/src/openalpr/postprocess/postprocess.h
+++ b/src/openalpr/postprocess/postprocess.h
@@ -20,7 +20,7 @@
 #ifndef OPENALPR_POSTPROCESS_H
 #define OPENALPR_POSTPROCESS_H
-#include "support/regex/oniguruma.h"
+#include "regexrule.h"
 #include "constants.h"
 #include "utility.h"
 #include <fstream>
@@ -53,22 +53,6 @@ namespace alpr
  bool wordCompare( const PPResult &left, const PPResult &right );
  bool letterCompare( const Letter &left, const Letter &right );
  class RegexRule
  {
    public:
      RegexRule(std::string region, std::string pattern);
      bool match(std::string text);
      std::string filterSkips(std::string text);
    private:
      int numchars;
      TRexpp trexp;
      std::string original;
      std::string regex;
      std::string region;
      std::vector<int> skipPositions;
  };
  class PostProcess
  {
--- a/src/openalpr/postprocess/regexrule.cpp
+++ b/src/openalpr/postprocess/regexrule.cpp
@@ -1,20 +1,169 @@
 /*
- * File:   regexrule.cpp
+ * Copyright (c) 2015 New Designs Unlimited, LLC
- * Author: mhill
+ * Opensource Automated License Plate Recognition [http://www.openalpr.com]
 *
- * Created on April 28, 2015, 6:28 AM
+ * This file is part of OpenAlpr.
- */
+ *
 * OpenAlpr is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License
 * version 3 as published by the Free Software Foundation
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
 #include "regexrule.h"
 using namespace std;
 namespace alpr
 {
-  RegexRule::RegexRule() {
+  RegexRule::RegexRule(string region, string pattern)
  {
    this->original = pattern;
    this->region = region;
    this->valid = false;
    string::iterator end_it = utf8::find_invalid(pattern.begin(), pattern.end());
    if (end_it != pattern.end()) {
      cerr << "Invalid UTF-8 encoding detected " << endl;
      return;
    }
    string::iterator utf_iterator = pattern.begin();
    numchars = 0;
    while (utf_iterator < pattern.end())
    {
      int cp = utf8::next(utf_iterator, pattern.end());
      string utf_character = utf8chr(cp);
      if (utf_character == "[")
      {
        this->regex = this->regex + "[";
        while (utf_character != "]" )
        {
          if (utf_iterator >= pattern.end())
            break; // Invalid regex, don't bother processing
          int cp = utf8::next(utf_iterator, pattern.end());
          utf_character = utf8chr(cp);
          this->regex = this->regex + utf_character;
        }
      }
      else if (utf_character == "\\")
      {
        // Don't add "\" characters to our character count
        this->regex = this->regex + utf_character;
        continue;
      }
      else if (utf_character == "?")
      {
        this->regex = this->regex + '.';
        this->skipPositions.push_back(numchars);
      }
      else if (utf_character == "@")
      {
        this->regex = this->regex + "\\p{Alpha}";
      }
      else if (utf_character == "#")
      {
        this->regex = this->regex + "\\p{Digit}";
      }
      else if ((utf_character == "*") || (utf_character == "+"))
      {
        cerr << "Regex with wildcards (* or +) not supported" << endl;
      }
      else
      {
        this->regex = this->regex + utf_character;
      }
      numchars++;
    }
    UChar* cstr_pattern = (UChar* )this->regex.c_str();
    OnigErrorInfo einfo;
    //cout << "Pattern: " << cstr_pattern << endl;
    int r = onig_new(&onig_regex, cstr_pattern, cstr_pattern + strlen((char* )cstr_pattern),
      ONIG_OPTION_DEFAULT, ONIG_ENCODING_UTF8, ONIG_SYNTAX_DEFAULT, &einfo);
    if (r != ONIG_NORMAL) {
      //char s[ONIG_MAX_ERROR_MESSAGE_LEN];
      //onig_error_code_to_str(s, r, &einfo);
      cerr << "Unable to load regex: " << pattern << endl;
    }
    else
    {
      this->valid = true;
    }
  }
-  RegexRule::~RegexRule() {
+  RegexRule::~RegexRule()
  {
    onig_free(onig_regex);
    onig_end();
  }
  bool RegexRule::match(string text)
  {
    if (!this->valid)
      return false;
    string::iterator end_it = utf8::find_invalid(text.begin(), text.end());
    if (end_it != text.end()) {
      cerr << "Invalid UTF-8 encoding detected " << endl;
      return false;
    }
    int text_char_length = utf8::distance(text.begin(), text.end());
    if (text_char_length != numchars)
      return false;
    OnigRegion *region = onig_region_new();
    unsigned char *start, *end;
    UChar* cstr_text = (UChar* )text.c_str();
    end   = cstr_text + strlen((char* )cstr_text);
    start = cstr_text;
    int match = onig_match(onig_regex, cstr_text, end, start, region, ONIG_OPTION_NONE);
    onig_region_free(region, 1);
    return match == text.length();
  }
  string RegexRule::filterSkips(string text)
  {
    string response = "";
    for (int i = 0; i < text.size(); i++)
    {
      bool skip = false;
      for (int j = 0; j < skipPositions.size(); j++)
      {
        if (skipPositions[j] == i)
        {
          skip = true;
          break;
        }
      }
      if (skip == false)
        response = response + text[i];
    }
    return response;
  }
 }
--- a/src/openalpr/postprocess/regexrule.h
+++ b/src/openalpr/postprocess/regexrule.h
@@ -1,21 +1,52 @@
 /*
- * File:   regexrule.h
+ * Copyright (c) 2015 New Designs Unlimited, LLC
- * Author: mhill
+ * Opensource Automated License Plate Recognition [http://www.openalpr.com]
 *
- * Created on April 28, 2015, 6:28 AM
+ * This file is part of OpenAlpr.
- */
+ *
 * OpenAlpr is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License
 * version 3 as published by the Free Software Foundation
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
 #ifndef OPENALPR_REGEXRULE_H
 #define	OPENALPR_REGEXRULE_H
 #include <iostream>
 #include <string>
 #include <cstring>
 #include <vector>
 #include "support/regex/oniguruma.h"
 #include "support/utf8.h"
 namespace alpr
 {
-  class RegexRule {
+  class RegexRule
  {
    public:
-    RegexRule();
+      RegexRule(std::string region, std::string pattern);
      virtual ~RegexRule();
  private:
      bool match(std::string text);
      std::string filterSkips(std::string text);
    private:
      bool valid;
      int numchars;
      regex_t* onig_regex;
      std::string original;
      std::string regex;
      std::string region;
      std::vector<int> skipPositions;
  };
 }
--- a/src/openalpr/support/CMakeLists.txt
+++ b/src/openalpr/support/CMakeLists.txt
@@ -5,10 +5,32 @@ set(support_source_files
 timing.cpp
 tinythread.cpp
 platform.cpp
 utf8.cpp
 )
-add_subdirectory(regex)
+set(regex_source_files
 regex/regsyntax.c
 regex/regposerr.c
 regex/regcomp.c
 regex/reggnu.c
 regex/regerror.c
 regex/regext.c
 regex/regversion.c
 regex/regparse.c
 regex/regenc.c
 regex/st.c
 regex/regposix.c
 regex/regexec.c
 regex/regtrav.c
 regex/ascii.c
 regex/unicode.c
 regex/utf8.c
 )
-add_library(support STATIC ${support_source_files})
+
 add_library(support STATIC 
  ${support_source_files} 
  ${regex_source_files}
 )
 SET_TARGET_PROPERTIES( support PROPERTIES COMPILE_FLAGS -fPIC)