mirror of
https://github.com/kerberos-io/openalpr-base.git
synced 2025-10-06 04:06:49 +08:00
Added support in support for onigurama and utf8
This commit is contained in:
@@ -457,80 +457,4 @@ namespace alpr
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
RegexRule::~RegexRule()
|
|
||||||
{
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
RegexRule::RegexRule(string region, string pattern)
|
|
||||||
{
|
|
||||||
this->original = pattern;
|
|
||||||
this->region = region;
|
|
||||||
|
|
||||||
numchars = 0;
|
|
||||||
for (int i = 0; i < pattern.size(); i++)
|
|
||||||
{
|
|
||||||
if (pattern.at(i) == '[')
|
|
||||||
{
|
|
||||||
while (pattern.at(i) != ']' )
|
|
||||||
{
|
|
||||||
this->regex = this->regex + pattern.at(i);
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
this->regex = this->regex + ']';
|
|
||||||
}
|
|
||||||
else if (pattern.at(i) == '?')
|
|
||||||
{
|
|
||||||
this->regex = this->regex + '.';
|
|
||||||
this->skipPositions.push_back(numchars);
|
|
||||||
}
|
|
||||||
else if (pattern.at(i) == '@')
|
|
||||||
{
|
|
||||||
this->regex = this->regex + "\\a";
|
|
||||||
}
|
|
||||||
else if (pattern.at(i) == '#')
|
|
||||||
{
|
|
||||||
this->regex = this->regex + "\\d";
|
|
||||||
}
|
|
||||||
|
|
||||||
numchars++;
|
|
||||||
}
|
|
||||||
|
|
||||||
trexp.Compile(this->regex.c_str());
|
|
||||||
|
|
||||||
//cout << "AA " << this->region << ": " << original << " regex: " << regex << endl;
|
|
||||||
//for (int z = 0; z < this->skipPositions.size(); z++)
|
|
||||||
// cout << "AA Skip position: " << skipPositions[z] << endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool RegexRule::match(string text)
|
|
||||||
{
|
|
||||||
if (text.length() != numchars)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
return trexp.Match(text.c_str());
|
|
||||||
}
|
|
||||||
|
|
||||||
string RegexRule::filterSkips(string text)
|
|
||||||
{
|
|
||||||
string response = "";
|
|
||||||
for (int i = 0; i < text.size(); i++)
|
|
||||||
{
|
|
||||||
bool skip = false;
|
|
||||||
for (int j = 0; j < skipPositions.size(); j++)
|
|
||||||
{
|
|
||||||
if (skipPositions[j] == i)
|
|
||||||
{
|
|
||||||
skip = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (skip == false)
|
|
||||||
response = response + text[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
return response;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
@@ -20,7 +20,7 @@
|
|||||||
#ifndef OPENALPR_POSTPROCESS_H
|
#ifndef OPENALPR_POSTPROCESS_H
|
||||||
#define OPENALPR_POSTPROCESS_H
|
#define OPENALPR_POSTPROCESS_H
|
||||||
|
|
||||||
#include "support/regex/oniguruma.h"
|
#include "regexrule.h"
|
||||||
#include "constants.h"
|
#include "constants.h"
|
||||||
#include "utility.h"
|
#include "utility.h"
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
@@ -53,22 +53,6 @@ namespace alpr
|
|||||||
bool wordCompare( const PPResult &left, const PPResult &right );
|
bool wordCompare( const PPResult &left, const PPResult &right );
|
||||||
bool letterCompare( const Letter &left, const Letter &right );
|
bool letterCompare( const Letter &left, const Letter &right );
|
||||||
|
|
||||||
class RegexRule
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
RegexRule(std::string region, std::string pattern);
|
|
||||||
|
|
||||||
bool match(std::string text);
|
|
||||||
std::string filterSkips(std::string text);
|
|
||||||
|
|
||||||
private:
|
|
||||||
int numchars;
|
|
||||||
TRexpp trexp;
|
|
||||||
std::string original;
|
|
||||||
std::string regex;
|
|
||||||
std::string region;
|
|
||||||
std::vector<int> skipPositions;
|
|
||||||
};
|
|
||||||
|
|
||||||
class PostProcess
|
class PostProcess
|
||||||
{
|
{
|
||||||
|
@@ -1,20 +1,169 @@
|
|||||||
/*
|
/*
|
||||||
* File: regexrule.cpp
|
* Copyright (c) 2015 New Designs Unlimited, LLC
|
||||||
* Author: mhill
|
* Opensource Automated License Plate Recognition [http://www.openalpr.com]
|
||||||
*
|
*
|
||||||
* Created on April 28, 2015, 6:28 AM
|
* This file is part of OpenAlpr.
|
||||||
|
*
|
||||||
|
* OpenAlpr is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License
|
||||||
|
* version 3 as published by the Free Software Foundation
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU Affero General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "regexrule.h"
|
#include "regexrule.h"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
namespace alpr
|
namespace alpr
|
||||||
{
|
{
|
||||||
|
|
||||||
RegexRule::RegexRule() {
|
RegexRule::RegexRule(string region, string pattern)
|
||||||
|
{
|
||||||
|
this->original = pattern;
|
||||||
|
this->region = region;
|
||||||
|
|
||||||
|
this->valid = false;
|
||||||
|
string::iterator end_it = utf8::find_invalid(pattern.begin(), pattern.end());
|
||||||
|
if (end_it != pattern.end()) {
|
||||||
|
cerr << "Invalid UTF-8 encoding detected " << endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
string::iterator utf_iterator = pattern.begin();
|
||||||
|
numchars = 0;
|
||||||
|
while (utf_iterator < pattern.end())
|
||||||
|
{
|
||||||
|
int cp = utf8::next(utf_iterator, pattern.end());
|
||||||
|
|
||||||
|
string utf_character = utf8chr(cp);
|
||||||
|
|
||||||
|
if (utf_character == "[")
|
||||||
|
{
|
||||||
|
this->regex = this->regex + "[";
|
||||||
|
|
||||||
|
while (utf_character != "]" )
|
||||||
|
{
|
||||||
|
if (utf_iterator >= pattern.end())
|
||||||
|
break; // Invalid regex, don't bother processing
|
||||||
|
int cp = utf8::next(utf_iterator, pattern.end());
|
||||||
|
|
||||||
|
utf_character = utf8chr(cp);
|
||||||
|
this->regex = this->regex + utf_character;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
else if (utf_character == "\\")
|
||||||
|
{
|
||||||
|
// Don't add "\" characters to our character count
|
||||||
|
this->regex = this->regex + utf_character;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else if (utf_character == "?")
|
||||||
|
{
|
||||||
|
this->regex = this->regex + '.';
|
||||||
|
this->skipPositions.push_back(numchars);
|
||||||
|
}
|
||||||
|
else if (utf_character == "@")
|
||||||
|
{
|
||||||
|
this->regex = this->regex + "\\p{Alpha}";
|
||||||
|
}
|
||||||
|
else if (utf_character == "#")
|
||||||
|
{
|
||||||
|
this->regex = this->regex + "\\p{Digit}";
|
||||||
|
}
|
||||||
|
else if ((utf_character == "*") || (utf_character == "+"))
|
||||||
|
{
|
||||||
|
cerr << "Regex with wildcards (* or +) not supported" << endl;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
this->regex = this->regex + utf_character;
|
||||||
|
}
|
||||||
|
|
||||||
|
numchars++;
|
||||||
|
}
|
||||||
|
|
||||||
|
UChar* cstr_pattern = (UChar* )this->regex.c_str();
|
||||||
|
OnigErrorInfo einfo;
|
||||||
|
|
||||||
|
//cout << "Pattern: " << cstr_pattern << endl;
|
||||||
|
int r = onig_new(&onig_regex, cstr_pattern, cstr_pattern + strlen((char* )cstr_pattern),
|
||||||
|
ONIG_OPTION_DEFAULT, ONIG_ENCODING_UTF8, ONIG_SYNTAX_DEFAULT, &einfo);
|
||||||
|
|
||||||
|
if (r != ONIG_NORMAL) {
|
||||||
|
//char s[ONIG_MAX_ERROR_MESSAGE_LEN];
|
||||||
|
//onig_error_code_to_str(s, r, &einfo);
|
||||||
|
cerr << "Unable to load regex: " << pattern << endl;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
this->valid = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
RegexRule::~RegexRule() {
|
RegexRule::~RegexRule()
|
||||||
|
{
|
||||||
|
onig_free(onig_regex);
|
||||||
|
onig_end();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool RegexRule::match(string text)
|
||||||
|
{
|
||||||
|
if (!this->valid)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
string::iterator end_it = utf8::find_invalid(text.begin(), text.end());
|
||||||
|
if (end_it != text.end()) {
|
||||||
|
cerr << "Invalid UTF-8 encoding detected " << endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
int text_char_length = utf8::distance(text.begin(), text.end());
|
||||||
|
|
||||||
|
if (text_char_length != numchars)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
OnigRegion *region = onig_region_new();
|
||||||
|
unsigned char *start, *end;
|
||||||
|
UChar* cstr_text = (UChar* )text.c_str();
|
||||||
|
end = cstr_text + strlen((char* )cstr_text);
|
||||||
|
start = cstr_text;
|
||||||
|
|
||||||
|
int match = onig_match(onig_regex, cstr_text, end, start, region, ONIG_OPTION_NONE);
|
||||||
|
|
||||||
|
onig_region_free(region, 1);
|
||||||
|
|
||||||
|
return match == text.length();
|
||||||
|
}
|
||||||
|
|
||||||
|
string RegexRule::filterSkips(string text)
|
||||||
|
{
|
||||||
|
string response = "";
|
||||||
|
for (int i = 0; i < text.size(); i++)
|
||||||
|
{
|
||||||
|
bool skip = false;
|
||||||
|
for (int j = 0; j < skipPositions.size(); j++)
|
||||||
|
{
|
||||||
|
if (skipPositions[j] == i)
|
||||||
|
{
|
||||||
|
skip = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (skip == false)
|
||||||
|
response = response + text[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
return response;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@@ -1,21 +1,52 @@
|
|||||||
/*
|
/*
|
||||||
* File: regexrule.h
|
* Copyright (c) 2015 New Designs Unlimited, LLC
|
||||||
* Author: mhill
|
* Opensource Automated License Plate Recognition [http://www.openalpr.com]
|
||||||
*
|
*
|
||||||
* Created on April 28, 2015, 6:28 AM
|
* This file is part of OpenAlpr.
|
||||||
|
*
|
||||||
|
* OpenAlpr is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License
|
||||||
|
* version 3 as published by the Free Software Foundation
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU Affero General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef OPENALPR_REGEXRULE_H
|
#ifndef OPENALPR_REGEXRULE_H
|
||||||
#define OPENALPR_REGEXRULE_H
|
#define OPENALPR_REGEXRULE_H
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
#include <cstring>
|
||||||
|
#include <vector>
|
||||||
|
#include "support/regex/oniguruma.h"
|
||||||
|
#include "support/utf8.h"
|
||||||
|
|
||||||
namespace alpr
|
namespace alpr
|
||||||
{
|
{
|
||||||
class RegexRule {
|
class RegexRule
|
||||||
|
{
|
||||||
public:
|
public:
|
||||||
RegexRule();
|
RegexRule(std::string region, std::string pattern);
|
||||||
virtual ~RegexRule();
|
virtual ~RegexRule();
|
||||||
private:
|
|
||||||
|
|
||||||
|
bool match(std::string text);
|
||||||
|
std::string filterSkips(std::string text);
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool valid;
|
||||||
|
|
||||||
|
int numchars;
|
||||||
|
regex_t* onig_regex;
|
||||||
|
std::string original;
|
||||||
|
std::string regex;
|
||||||
|
std::string region;
|
||||||
|
std::vector<int> skipPositions;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -5,10 +5,32 @@ set(support_source_files
|
|||||||
timing.cpp
|
timing.cpp
|
||||||
tinythread.cpp
|
tinythread.cpp
|
||||||
platform.cpp
|
platform.cpp
|
||||||
|
utf8.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
add_subdirectory(regex)
|
set(regex_source_files
|
||||||
|
regex/regsyntax.c
|
||||||
|
regex/regposerr.c
|
||||||
|
regex/regcomp.c
|
||||||
|
regex/reggnu.c
|
||||||
|
regex/regerror.c
|
||||||
|
regex/regext.c
|
||||||
|
regex/regversion.c
|
||||||
|
regex/regparse.c
|
||||||
|
regex/regenc.c
|
||||||
|
regex/st.c
|
||||||
|
regex/regposix.c
|
||||||
|
regex/regexec.c
|
||||||
|
regex/regtrav.c
|
||||||
|
regex/ascii.c
|
||||||
|
regex/unicode.c
|
||||||
|
regex/utf8.c
|
||||||
|
)
|
||||||
|
|
||||||
add_library(support STATIC ${support_source_files})
|
|
||||||
|
add_library(support STATIC
|
||||||
|
${support_source_files}
|
||||||
|
${regex_source_files}
|
||||||
|
)
|
||||||
|
|
||||||
SET_TARGET_PROPERTIES( support PROPERTIES COMPILE_FLAGS -fPIC)
|
SET_TARGET_PROPERTIES( support PROPERTIES COMPILE_FLAGS -fPIC)
|
Reference in New Issue
Block a user