From c5324258f44976d9125ff112611cc013030c1b87 Mon Sep 17 00:00:00 2001 From: Matt Hill Date: Sun, 3 May 2015 14:02:06 -0400 Subject: [PATCH] Added support in support for onigurama and utf8 --- src/openalpr/postprocess/postprocess.cpp | 76 ---------- src/openalpr/postprocess/postprocess.h | 20 +-- src/openalpr/postprocess/regexrule.cpp | 169 +++++++++++++++++++++-- src/openalpr/postprocess/regexrule.h | 51 +++++-- src/openalpr/support/CMakeLists.txt | 26 +++- 5 files changed, 226 insertions(+), 116 deletions(-) diff --git a/src/openalpr/postprocess/postprocess.cpp b/src/openalpr/postprocess/postprocess.cpp index 70fde0c..989e31b 100644 --- a/src/openalpr/postprocess/postprocess.cpp +++ b/src/openalpr/postprocess/postprocess.cpp @@ -457,80 +457,4 @@ namespace alpr return true; } - RegexRule::~RegexRule() - { - - } - - RegexRule::RegexRule(string region, string pattern) - { - this->original = pattern; - this->region = region; - - numchars = 0; - for (int i = 0; i < pattern.size(); i++) - { - if (pattern.at(i) == '[') - { - while (pattern.at(i) != ']' ) - { - this->regex = this->regex + pattern.at(i); - i++; - } - this->regex = this->regex + ']'; - } - else if (pattern.at(i) == '?') - { - this->regex = this->regex + '.'; - this->skipPositions.push_back(numchars); - } - else if (pattern.at(i) == '@') - { - this->regex = this->regex + "\\a"; - } - else if (pattern.at(i) == '#') - { - this->regex = this->regex + "\\d"; - } - - numchars++; - } - - trexp.Compile(this->regex.c_str()); - - //cout << "AA " << this->region << ": " << original << " regex: " << regex << endl; - //for (int z = 0; z < this->skipPositions.size(); z++) - // cout << "AA Skip position: " << skipPositions[z] << endl; - } - - bool RegexRule::match(string text) - { - if (text.length() != numchars) - return false; - - return trexp.Match(text.c_str()); - } - - string RegexRule::filterSkips(string text) - { - string response = ""; - for (int i = 0; i < text.size(); i++) - { - bool skip = false; - for (int j = 0; j < skipPositions.size(); j++) - { - if (skipPositions[j] == i) - { - skip = true; - break; - } - } - - if (skip == false) - response = response + text[i]; - } - - return response; - } - } \ No newline at end of file diff --git a/src/openalpr/postprocess/postprocess.h b/src/openalpr/postprocess/postprocess.h index f87a886..7760b07 100644 --- a/src/openalpr/postprocess/postprocess.h +++ b/src/openalpr/postprocess/postprocess.h @@ -20,7 +20,7 @@ #ifndef OPENALPR_POSTPROCESS_H #define OPENALPR_POSTPROCESS_H -#include "support/regex/oniguruma.h" +#include "regexrule.h" #include "constants.h" #include "utility.h" #include @@ -53,23 +53,7 @@ namespace alpr bool wordCompare( const PPResult &left, const PPResult &right ); bool letterCompare( const Letter &left, const Letter &right ); - class RegexRule - { - public: - RegexRule(std::string region, std::string pattern); - - bool match(std::string text); - std::string filterSkips(std::string text); - - private: - int numchars; - TRexpp trexp; - std::string original; - std::string regex; - std::string region; - std::vector skipPositions; - }; - + class PostProcess { public: diff --git a/src/openalpr/postprocess/regexrule.cpp b/src/openalpr/postprocess/regexrule.cpp index 06ea6c3..2e3cad4 100644 --- a/src/openalpr/postprocess/regexrule.cpp +++ b/src/openalpr/postprocess/regexrule.cpp @@ -1,21 +1,170 @@ -/* - * File: regexrule.cpp - * Author: mhill - * - * Created on April 28, 2015, 6:28 AM - */ +/* + * Copyright (c) 2015 New Designs Unlimited, LLC + * Opensource Automated License Plate Recognition [http://www.openalpr.com] + * + * This file is part of OpenAlpr. + * + * OpenAlpr is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License + * version 3 as published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . +*/ #include "regexrule.h" +using namespace std; + namespace alpr { - - RegexRule::RegexRule() { - } + + RegexRule::RegexRule(string region, string pattern) + { + this->original = pattern; + this->region = region; + this->valid = false; + string::iterator end_it = utf8::find_invalid(pattern.begin(), pattern.end()); + if (end_it != pattern.end()) { + cerr << "Invalid UTF-8 encoding detected " << endl; + return; + } + + string::iterator utf_iterator = pattern.begin(); + numchars = 0; + while (utf_iterator < pattern.end()) + { + int cp = utf8::next(utf_iterator, pattern.end()); + + string utf_character = utf8chr(cp); + + if (utf_character == "[") + { + this->regex = this->regex + "["; + + while (utf_character != "]" ) + { + if (utf_iterator >= pattern.end()) + break; // Invalid regex, don't bother processing + int cp = utf8::next(utf_iterator, pattern.end()); - RegexRule::~RegexRule() { + utf_character = utf8chr(cp); + this->regex = this->regex + utf_character; + } + + } + else if (utf_character == "\\") + { + // Don't add "\" characters to our character count + this->regex = this->regex + utf_character; + continue; + } + else if (utf_character == "?") + { + this->regex = this->regex + '.'; + this->skipPositions.push_back(numchars); + } + else if (utf_character == "@") + { + this->regex = this->regex + "\\p{Alpha}"; + } + else if (utf_character == "#") + { + this->regex = this->regex + "\\p{Digit}"; + } + else if ((utf_character == "*") || (utf_character == "+")) + { + cerr << "Regex with wildcards (* or +) not supported" << endl; + } + else + { + this->regex = this->regex + utf_character; + } + + numchars++; + } + + UChar* cstr_pattern = (UChar* )this->regex.c_str(); + OnigErrorInfo einfo; + + //cout << "Pattern: " << cstr_pattern << endl; + int r = onig_new(&onig_regex, cstr_pattern, cstr_pattern + strlen((char* )cstr_pattern), + ONIG_OPTION_DEFAULT, ONIG_ENCODING_UTF8, ONIG_SYNTAX_DEFAULT, &einfo); + + if (r != ONIG_NORMAL) { + //char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + //onig_error_code_to_str(s, r, &einfo); + cerr << "Unable to load regex: " << pattern << endl; + } + else + { + this->valid = true; + } } + + RegexRule::~RegexRule() + { + onig_free(onig_regex); + onig_end(); + } + + bool RegexRule::match(string text) + { + if (!this->valid) + return false; + + string::iterator end_it = utf8::find_invalid(text.begin(), text.end()); + if (end_it != text.end()) { + cerr << "Invalid UTF-8 encoding detected " << endl; + return false; + } + + int text_char_length = utf8::distance(text.begin(), text.end()); + + if (text_char_length != numchars) + return false; + + OnigRegion *region = onig_region_new(); + unsigned char *start, *end; + UChar* cstr_text = (UChar* )text.c_str(); + end = cstr_text + strlen((char* )cstr_text); + start = cstr_text; + + int match = onig_match(onig_regex, cstr_text, end, start, region, ONIG_OPTION_NONE); + + onig_region_free(region, 1); + + return match == text.length(); + } + + string RegexRule::filterSkips(string text) + { + string response = ""; + for (int i = 0; i < text.size(); i++) + { + bool skip = false; + for (int j = 0; j < skipPositions.size(); j++) + { + if (skipPositions[j] == i) + { + skip = true; + break; + } + } + + if (skip == false) + response = response + text[i]; + } + + return response; + } + } diff --git a/src/openalpr/postprocess/regexrule.h b/src/openalpr/postprocess/regexrule.h index 4afcb30..89a2e55 100644 --- a/src/openalpr/postprocess/regexrule.h +++ b/src/openalpr/postprocess/regexrule.h @@ -1,21 +1,52 @@ -/* - * File: regexrule.h - * Author: mhill +/* + * Copyright (c) 2015 New Designs Unlimited, LLC + * Opensource Automated License Plate Recognition [http://www.openalpr.com] * - * Created on April 28, 2015, 6:28 AM - */ + * This file is part of OpenAlpr. + * + * OpenAlpr is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License + * version 3 as published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . +*/ #ifndef OPENALPR_REGEXRULE_H #define OPENALPR_REGEXRULE_H +#include +#include +#include +#include +#include "support/regex/oniguruma.h" +#include "support/utf8.h" + namespace alpr { - class RegexRule { - public: - RegexRule(); - virtual ~RegexRule(); - private: + class RegexRule + { + public: + RegexRule(std::string region, std::string pattern); + virtual ~RegexRule(); + bool match(std::string text); + std::string filterSkips(std::string text); + + private: + bool valid; + + int numchars; + regex_t* onig_regex; + std::string original; + std::string regex; + std::string region; + std::vector skipPositions; }; } diff --git a/src/openalpr/support/CMakeLists.txt b/src/openalpr/support/CMakeLists.txt index 6ea731c..f2db822 100644 --- a/src/openalpr/support/CMakeLists.txt +++ b/src/openalpr/support/CMakeLists.txt @@ -5,10 +5,32 @@ set(support_source_files timing.cpp tinythread.cpp platform.cpp + utf8.cpp ) -add_subdirectory(regex) +set(regex_source_files +regex/regsyntax.c +regex/regposerr.c +regex/regcomp.c +regex/reggnu.c +regex/regerror.c +regex/regext.c +regex/regversion.c +regex/regparse.c +regex/regenc.c +regex/st.c +regex/regposix.c +regex/regexec.c +regex/regtrav.c +regex/ascii.c +regex/unicode.c +regex/utf8.c +) -add_library(support STATIC ${support_source_files}) + +add_library(support STATIC + ${support_source_files} + ${regex_source_files} +) SET_TARGET_PROPERTIES( support PROPERTIES COMPILE_FLAGS -fPIC) \ No newline at end of file