mirror of
https://github.com/kerberos-io/openalpr-base.git
synced 2025-10-07 04:50:54 +08:00
Added function for efficient levenshtein distance and unit test
This commit is contained in:
@@ -264,6 +264,125 @@ namespace alpr
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Compares two strings and computes the edit distance between them
|
||||||
|
// http://en.wikipedia.org/wiki/Levenshtein_distance
|
||||||
|
// max is the cutoff (i.e., max distance) where we stop trying to find distance
|
||||||
|
int levenshteinDistance (const std::string &s1, const std::string &s2, int max)
|
||||||
|
{
|
||||||
|
const char* word1 = s1.c_str();
|
||||||
|
int len1 = s1.length();
|
||||||
|
const char* word2 = s2.c_str();
|
||||||
|
int len2 = s2.length();
|
||||||
|
max--;
|
||||||
|
|
||||||
|
int matrix[2][len2 + 1];
|
||||||
|
int i;
|
||||||
|
int j;
|
||||||
|
|
||||||
|
/*
|
||||||
|
Initialize the 0 row of "matrix".
|
||||||
|
|
||||||
|
0
|
||||||
|
1
|
||||||
|
2
|
||||||
|
3
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
for (j = 0; j <= len2; j++) {
|
||||||
|
matrix[0][j] = j;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Loop over column. */
|
||||||
|
for (i = 1; i <= len1; i++) {
|
||||||
|
char c1;
|
||||||
|
/* The first value to consider of the ith column. */
|
||||||
|
int min_j;
|
||||||
|
/* The last value to consider of the ith column. */
|
||||||
|
int max_j;
|
||||||
|
/* The smallest value of the matrix in the ith column. */
|
||||||
|
int col_min;
|
||||||
|
/* The next column of the matrix to fill in. */
|
||||||
|
int next;
|
||||||
|
/* The previously-filled-in column of the matrix. */
|
||||||
|
int prev;
|
||||||
|
|
||||||
|
c1 = word1[i-1];
|
||||||
|
min_j = 1;
|
||||||
|
if (i > max) {
|
||||||
|
min_j = i - max;
|
||||||
|
}
|
||||||
|
max_j = len2;
|
||||||
|
if (len2 > max + i) {
|
||||||
|
max_j = max + i;
|
||||||
|
}
|
||||||
|
col_min = INT_MAX;
|
||||||
|
next = i % 2;
|
||||||
|
if (next == 1) {
|
||||||
|
prev = 0;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
prev = 1;
|
||||||
|
}
|
||||||
|
matrix[next][0] = i;
|
||||||
|
/* Loop over rows. */
|
||||||
|
for (j = 1; j <= len2; j++) {
|
||||||
|
if (j < min_j || j > max_j) {
|
||||||
|
/* Put a large value in there. */
|
||||||
|
matrix[next][j] = max + 1;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
char c2;
|
||||||
|
|
||||||
|
c2 = word2[j-1];
|
||||||
|
if (c1 == c2) {
|
||||||
|
/* The character at position i in word1 is the same as
|
||||||
|
the character at position j in word2. */
|
||||||
|
matrix[next][j] = matrix[prev][j-1];
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* The character at position i in word1 is not the
|
||||||
|
same as the character at position j in word2, so
|
||||||
|
work out what the minimum cost for getting to cell
|
||||||
|
i, j is. */
|
||||||
|
int del;
|
||||||
|
int insert;
|
||||||
|
int substitute;
|
||||||
|
int minimum;
|
||||||
|
|
||||||
|
del = matrix[prev][j] + 1;
|
||||||
|
insert = matrix[next][j-1] + 1;
|
||||||
|
substitute = matrix[prev][j-1] + 1;
|
||||||
|
minimum = del;
|
||||||
|
if (insert < minimum) {
|
||||||
|
minimum = insert;
|
||||||
|
}
|
||||||
|
if (substitute < minimum) {
|
||||||
|
minimum = substitute;
|
||||||
|
}
|
||||||
|
matrix[next][j] = minimum;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* Find the minimum value in the ith column. */
|
||||||
|
if (matrix[next][j] < col_min) {
|
||||||
|
col_min = matrix[next][j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (col_min > max) {
|
||||||
|
/* All the elements of the ith column are greater than the
|
||||||
|
maximum, so no match less than or equal to max can be
|
||||||
|
found by looking at succeeding columns. */
|
||||||
|
return max + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int returnval = matrix[len1 % 2][len2];
|
||||||
|
if (returnval > max + 1)
|
||||||
|
returnval = max + 1;
|
||||||
|
return returnval;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
LineSegment::LineSegment()
|
LineSegment::LineSegment()
|
||||||
{
|
{
|
||||||
init(0, 0, 0, 0);
|
init(0, 0, 0, 0);
|
||||||
|
@@ -101,7 +101,7 @@ namespace alpr
|
|||||||
|
|
||||||
cv::Mat addLabel(cv::Mat input, std::string label);
|
cv::Mat addLabel(cv::Mat input, std::string label);
|
||||||
|
|
||||||
|
int levenshteinDistance (const std::string &s1, const std::string &s2, int max);
|
||||||
std::string toString(int value);
|
std::string toString(int value);
|
||||||
std::string toString(unsigned int value);
|
std::string toString(unsigned int value);
|
||||||
std::string toString(float value);
|
std::string toString(float value);
|
||||||
|
@@ -36,4 +36,26 @@ TEST_CASE( "LineSegment Test", "[2d primitives]" ) {
|
|||||||
REQUIRE( median(testarray1, 6) == 3 );
|
REQUIRE( median(testarray1, 6) == 3 );
|
||||||
REQUIRE( median(testarray2, 6) == 1 );
|
REQUIRE( median(testarray2, 6) == 1 );
|
||||||
REQUIRE( median(testarray3, 0) == 0 );
|
REQUIRE( median(testarray3, 0) == 0 );
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_CASE( "Test Levenshtein Distance", "[levenshtein]" ) {
|
||||||
|
|
||||||
|
// Test the maximum works correctly
|
||||||
|
REQUIRE( levenshteinDistance("asdf", "bbbb", 10) == 4 );
|
||||||
|
REQUIRE( levenshteinDistance("asdf", "bbbb", 4) == 4 );
|
||||||
|
REQUIRE( levenshteinDistance("asdf", "bbbb", 3) == 3 );
|
||||||
|
REQUIRE( levenshteinDistance("asdf", "bbbb", 2) == 2 );
|
||||||
|
REQUIRE( levenshteinDistance("asdf", "bbbb", 1) == 1 );
|
||||||
|
REQUIRE( levenshteinDistance("asdf", "bbbb", 0) == 0 );
|
||||||
|
|
||||||
|
// Test some substitutions
|
||||||
|
REQUIRE( levenshteinDistance("P32RX", "PE32RX", 10) == 1 );
|
||||||
|
REQUIRE( levenshteinDistance("P32RX", "PE32RX", 2) == 1 );
|
||||||
|
REQUIRE( levenshteinDistance("ASDF11", "ASDF1", 10) == 1 );
|
||||||
|
REQUIRE( levenshteinDistance("1ASDF1", "ASDF1", 10) == 1 );
|
||||||
|
REQUIRE( levenshteinDistance("ASD", "ASDF1", 2) == 2 );
|
||||||
|
REQUIRE( levenshteinDistance("11111", "11I11", 2) == 1 );
|
||||||
|
|
||||||
|
REQUIRE( levenshteinDistance("", "AAAA", 2) == 2 );
|
||||||
|
REQUIRE( levenshteinDistance("BA", "AAAA", 2) == 2 );
|
||||||
}
|
}
|
Reference in New Issue
Block a user