Added support in support for onigurama and utf8

This commit is contained in:
Matt Hill
2015-05-03 14:02:06 -04:00
parent be5c3b307c
commit c5324258f4
5 changed files with 226 additions and 116 deletions

View File

@@ -457,80 +457,4 @@ namespace alpr
return true; return true;
} }
RegexRule::~RegexRule()
{
}
RegexRule::RegexRule(string region, string pattern)
{
this->original = pattern;
this->region = region;
numchars = 0;
for (int i = 0; i < pattern.size(); i++)
{
if (pattern.at(i) == '[')
{
while (pattern.at(i) != ']' )
{
this->regex = this->regex + pattern.at(i);
i++;
}
this->regex = this->regex + ']';
}
else if (pattern.at(i) == '?')
{
this->regex = this->regex + '.';
this->skipPositions.push_back(numchars);
}
else if (pattern.at(i) == '@')
{
this->regex = this->regex + "\\a";
}
else if (pattern.at(i) == '#')
{
this->regex = this->regex + "\\d";
}
numchars++;
}
trexp.Compile(this->regex.c_str());
//cout << "AA " << this->region << ": " << original << " regex: " << regex << endl;
//for (int z = 0; z < this->skipPositions.size(); z++)
// cout << "AA Skip position: " << skipPositions[z] << endl;
}
bool RegexRule::match(string text)
{
if (text.length() != numchars)
return false;
return trexp.Match(text.c_str());
}
string RegexRule::filterSkips(string text)
{
string response = "";
for (int i = 0; i < text.size(); i++)
{
bool skip = false;
for (int j = 0; j < skipPositions.size(); j++)
{
if (skipPositions[j] == i)
{
skip = true;
break;
}
}
if (skip == false)
response = response + text[i];
}
return response;
}
} }

View File

@@ -20,7 +20,7 @@
#ifndef OPENALPR_POSTPROCESS_H #ifndef OPENALPR_POSTPROCESS_H
#define OPENALPR_POSTPROCESS_H #define OPENALPR_POSTPROCESS_H
#include "support/regex/oniguruma.h" #include "regexrule.h"
#include "constants.h" #include "constants.h"
#include "utility.h" #include "utility.h"
#include <fstream> #include <fstream>
@@ -53,22 +53,6 @@ namespace alpr
bool wordCompare( const PPResult &left, const PPResult &right ); bool wordCompare( const PPResult &left, const PPResult &right );
bool letterCompare( const Letter &left, const Letter &right ); bool letterCompare( const Letter &left, const Letter &right );
class RegexRule
{
public:
RegexRule(std::string region, std::string pattern);
bool match(std::string text);
std::string filterSkips(std::string text);
private:
int numchars;
TRexpp trexp;
std::string original;
std::string regex;
std::string region;
std::vector<int> skipPositions;
};
class PostProcess class PostProcess
{ {

View File

@@ -1,20 +1,169 @@
/* /*
* File: regexrule.cpp * Copyright (c) 2015 New Designs Unlimited, LLC
* Author: mhill * Opensource Automated License Plate Recognition [http://www.openalpr.com]
* *
* Created on April 28, 2015, 6:28 AM * This file is part of OpenAlpr.
*/ *
* OpenAlpr is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License
* version 3 as published by the Free Software Foundation
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "regexrule.h" #include "regexrule.h"
using namespace std;
namespace alpr namespace alpr
{ {
RegexRule::RegexRule() { RegexRule::RegexRule(string region, string pattern)
{
this->original = pattern;
this->region = region;
this->valid = false;
string::iterator end_it = utf8::find_invalid(pattern.begin(), pattern.end());
if (end_it != pattern.end()) {
cerr << "Invalid UTF-8 encoding detected " << endl;
return;
}
string::iterator utf_iterator = pattern.begin();
numchars = 0;
while (utf_iterator < pattern.end())
{
int cp = utf8::next(utf_iterator, pattern.end());
string utf_character = utf8chr(cp);
if (utf_character == "[")
{
this->regex = this->regex + "[";
while (utf_character != "]" )
{
if (utf_iterator >= pattern.end())
break; // Invalid regex, don't bother processing
int cp = utf8::next(utf_iterator, pattern.end());
utf_character = utf8chr(cp);
this->regex = this->regex + utf_character;
}
}
else if (utf_character == "\\")
{
// Don't add "\" characters to our character count
this->regex = this->regex + utf_character;
continue;
}
else if (utf_character == "?")
{
this->regex = this->regex + '.';
this->skipPositions.push_back(numchars);
}
else if (utf_character == "@")
{
this->regex = this->regex + "\\p{Alpha}";
}
else if (utf_character == "#")
{
this->regex = this->regex + "\\p{Digit}";
}
else if ((utf_character == "*") || (utf_character == "+"))
{
cerr << "Regex with wildcards (* or +) not supported" << endl;
}
else
{
this->regex = this->regex + utf_character;
}
numchars++;
}
UChar* cstr_pattern = (UChar* )this->regex.c_str();
OnigErrorInfo einfo;
//cout << "Pattern: " << cstr_pattern << endl;
int r = onig_new(&onig_regex, cstr_pattern, cstr_pattern + strlen((char* )cstr_pattern),
ONIG_OPTION_DEFAULT, ONIG_ENCODING_UTF8, ONIG_SYNTAX_DEFAULT, &einfo);
if (r != ONIG_NORMAL) {
//char s[ONIG_MAX_ERROR_MESSAGE_LEN];
//onig_error_code_to_str(s, r, &einfo);
cerr << "Unable to load regex: " << pattern << endl;
}
else
{
this->valid = true;
}
} }
RegexRule::~RegexRule() { RegexRule::~RegexRule()
{
onig_free(onig_regex);
onig_end();
}
bool RegexRule::match(string text)
{
if (!this->valid)
return false;
string::iterator end_it = utf8::find_invalid(text.begin(), text.end());
if (end_it != text.end()) {
cerr << "Invalid UTF-8 encoding detected " << endl;
return false;
}
int text_char_length = utf8::distance(text.begin(), text.end());
if (text_char_length != numchars)
return false;
OnigRegion *region = onig_region_new();
unsigned char *start, *end;
UChar* cstr_text = (UChar* )text.c_str();
end = cstr_text + strlen((char* )cstr_text);
start = cstr_text;
int match = onig_match(onig_regex, cstr_text, end, start, region, ONIG_OPTION_NONE);
onig_region_free(region, 1);
return match == text.length();
}
string RegexRule::filterSkips(string text)
{
string response = "";
for (int i = 0; i < text.size(); i++)
{
bool skip = false;
for (int j = 0; j < skipPositions.size(); j++)
{
if (skipPositions[j] == i)
{
skip = true;
break;
}
}
if (skip == false)
response = response + text[i];
}
return response;
} }
} }

View File

@@ -1,21 +1,52 @@
/* /*
* File: regexrule.h * Copyright (c) 2015 New Designs Unlimited, LLC
* Author: mhill * Opensource Automated License Plate Recognition [http://www.openalpr.com]
* *
* Created on April 28, 2015, 6:28 AM * This file is part of OpenAlpr.
*/ *
* OpenAlpr is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License
* version 3 as published by the Free Software Foundation
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef OPENALPR_REGEXRULE_H #ifndef OPENALPR_REGEXRULE_H
#define OPENALPR_REGEXRULE_H #define OPENALPR_REGEXRULE_H
#include <iostream>
#include <string>
#include <cstring>
#include <vector>
#include "support/regex/oniguruma.h"
#include "support/utf8.h"
namespace alpr namespace alpr
{ {
class RegexRule { class RegexRule
{
public: public:
RegexRule(); RegexRule(std::string region, std::string pattern);
virtual ~RegexRule(); virtual ~RegexRule();
private:
bool match(std::string text);
std::string filterSkips(std::string text);
private:
bool valid;
int numchars;
regex_t* onig_regex;
std::string original;
std::string regex;
std::string region;
std::vector<int> skipPositions;
}; };
} }

View File

@@ -5,10 +5,32 @@ set(support_source_files
timing.cpp timing.cpp
tinythread.cpp tinythread.cpp
platform.cpp platform.cpp
utf8.cpp
) )
add_subdirectory(regex) set(regex_source_files
regex/regsyntax.c
regex/regposerr.c
regex/regcomp.c
regex/reggnu.c
regex/regerror.c
regex/regext.c
regex/regversion.c
regex/regparse.c
regex/regenc.c
regex/st.c
regex/regposix.c
regex/regexec.c
regex/regtrav.c
regex/ascii.c
regex/unicode.c
regex/utf8.c
)
add_library(support STATIC ${support_source_files})
add_library(support STATIC
${support_source_files}
${regex_source_files}
)
SET_TARGET_PROPERTIES( support PROPERTIES COMPILE_FLAGS -fPIC) SET_TARGET_PROPERTIES( support PROPERTIES COMPILE_FLAGS -fPIC)