mirror of
https://github.com/kerberos-io/openalpr-base.git
synced 2025-10-06 23:02:45 +08:00
Added function for efficient levenshtein distance and unit test
This commit is contained in:
@@ -264,6 +264,125 @@ namespace alpr
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Compares two strings and computes the edit distance between them
|
||||
// http://en.wikipedia.org/wiki/Levenshtein_distance
|
||||
// max is the cutoff (i.e., max distance) where we stop trying to find distance
|
||||
int levenshteinDistance (const std::string &s1, const std::string &s2, int max)
|
||||
{
|
||||
const char* word1 = s1.c_str();
|
||||
int len1 = s1.length();
|
||||
const char* word2 = s2.c_str();
|
||||
int len2 = s2.length();
|
||||
max--;
|
||||
|
||||
int matrix[2][len2 + 1];
|
||||
int i;
|
||||
int j;
|
||||
|
||||
/*
|
||||
Initialize the 0 row of "matrix".
|
||||
|
||||
0
|
||||
1
|
||||
2
|
||||
3
|
||||
|
||||
*/
|
||||
|
||||
for (j = 0; j <= len2; j++) {
|
||||
matrix[0][j] = j;
|
||||
}
|
||||
|
||||
/* Loop over column. */
|
||||
for (i = 1; i <= len1; i++) {
|
||||
char c1;
|
||||
/* The first value to consider of the ith column. */
|
||||
int min_j;
|
||||
/* The last value to consider of the ith column. */
|
||||
int max_j;
|
||||
/* The smallest value of the matrix in the ith column. */
|
||||
int col_min;
|
||||
/* The next column of the matrix to fill in. */
|
||||
int next;
|
||||
/* The previously-filled-in column of the matrix. */
|
||||
int prev;
|
||||
|
||||
c1 = word1[i-1];
|
||||
min_j = 1;
|
||||
if (i > max) {
|
||||
min_j = i - max;
|
||||
}
|
||||
max_j = len2;
|
||||
if (len2 > max + i) {
|
||||
max_j = max + i;
|
||||
}
|
||||
col_min = INT_MAX;
|
||||
next = i % 2;
|
||||
if (next == 1) {
|
||||
prev = 0;
|
||||
}
|
||||
else {
|
||||
prev = 1;
|
||||
}
|
||||
matrix[next][0] = i;
|
||||
/* Loop over rows. */
|
||||
for (j = 1; j <= len2; j++) {
|
||||
if (j < min_j || j > max_j) {
|
||||
/* Put a large value in there. */
|
||||
matrix[next][j] = max + 1;
|
||||
}
|
||||
else {
|
||||
char c2;
|
||||
|
||||
c2 = word2[j-1];
|
||||
if (c1 == c2) {
|
||||
/* The character at position i in word1 is the same as
|
||||
the character at position j in word2. */
|
||||
matrix[next][j] = matrix[prev][j-1];
|
||||
}
|
||||
else {
|
||||
/* The character at position i in word1 is not the
|
||||
same as the character at position j in word2, so
|
||||
work out what the minimum cost for getting to cell
|
||||
i, j is. */
|
||||
int del;
|
||||
int insert;
|
||||
int substitute;
|
||||
int minimum;
|
||||
|
||||
del = matrix[prev][j] + 1;
|
||||
insert = matrix[next][j-1] + 1;
|
||||
substitute = matrix[prev][j-1] + 1;
|
||||
minimum = del;
|
||||
if (insert < minimum) {
|
||||
minimum = insert;
|
||||
}
|
||||
if (substitute < minimum) {
|
||||
minimum = substitute;
|
||||
}
|
||||
matrix[next][j] = minimum;
|
||||
}
|
||||
}
|
||||
/* Find the minimum value in the ith column. */
|
||||
if (matrix[next][j] < col_min) {
|
||||
col_min = matrix[next][j];
|
||||
}
|
||||
}
|
||||
if (col_min > max) {
|
||||
/* All the elements of the ith column are greater than the
|
||||
maximum, so no match less than or equal to max can be
|
||||
found by looking at succeeding columns. */
|
||||
return max + 1;
|
||||
}
|
||||
}
|
||||
int returnval = matrix[len1 % 2][len2];
|
||||
if (returnval > max + 1)
|
||||
returnval = max + 1;
|
||||
return returnval;
|
||||
}
|
||||
|
||||
|
||||
LineSegment::LineSegment()
|
||||
{
|
||||
init(0, 0, 0, 0);
|
||||
|
@@ -101,7 +101,7 @@ namespace alpr
|
||||
|
||||
cv::Mat addLabel(cv::Mat input, std::string label);
|
||||
|
||||
|
||||
int levenshteinDistance (const std::string &s1, const std::string &s2, int max);
|
||||
std::string toString(int value);
|
||||
std::string toString(unsigned int value);
|
||||
std::string toString(float value);
|
||||
|
@@ -36,4 +36,26 @@ TEST_CASE( "LineSegment Test", "[2d primitives]" ) {
|
||||
REQUIRE( median(testarray1, 6) == 3 );
|
||||
REQUIRE( median(testarray2, 6) == 1 );
|
||||
REQUIRE( median(testarray3, 0) == 0 );
|
||||
}
|
||||
|
||||
TEST_CASE( "Test Levenshtein Distance", "[levenshtein]" ) {
|
||||
|
||||
// Test the maximum works correctly
|
||||
REQUIRE( levenshteinDistance("asdf", "bbbb", 10) == 4 );
|
||||
REQUIRE( levenshteinDistance("asdf", "bbbb", 4) == 4 );
|
||||
REQUIRE( levenshteinDistance("asdf", "bbbb", 3) == 3 );
|
||||
REQUIRE( levenshteinDistance("asdf", "bbbb", 2) == 2 );
|
||||
REQUIRE( levenshteinDistance("asdf", "bbbb", 1) == 1 );
|
||||
REQUIRE( levenshteinDistance("asdf", "bbbb", 0) == 0 );
|
||||
|
||||
// Test some substitutions
|
||||
REQUIRE( levenshteinDistance("P32RX", "PE32RX", 10) == 1 );
|
||||
REQUIRE( levenshteinDistance("P32RX", "PE32RX", 2) == 1 );
|
||||
REQUIRE( levenshteinDistance("ASDF11", "ASDF1", 10) == 1 );
|
||||
REQUIRE( levenshteinDistance("1ASDF1", "ASDF1", 10) == 1 );
|
||||
REQUIRE( levenshteinDistance("ASD", "ASDF1", 2) == 2 );
|
||||
REQUIRE( levenshteinDistance("11111", "11I11", 2) == 1 );
|
||||
|
||||
REQUIRE( levenshteinDistance("", "AAAA", 2) == 2 );
|
||||
REQUIRE( levenshteinDistance("BA", "AAAA", 2) == 2 );
|
||||
}
|
Reference in New Issue
Block a user