Added function for efficient levenshtein distance and unit test

This commit is contained in:
Matt Hill
2014-12-07 14:53:11 -05:00
parent 7fee9b9fce
commit 1a76de69a7
3 changed files with 142 additions and 1 deletions

View File

@@ -264,6 +264,125 @@ namespace alpr
}
}
// Compares two strings and computes the edit distance between them
// http://en.wikipedia.org/wiki/Levenshtein_distance
// max is the cutoff (i.e., max distance) where we stop trying to find distance
int levenshteinDistance (const std::string &s1, const std::string &s2, int max)
{
const char* word1 = s1.c_str();
int len1 = s1.length();
const char* word2 = s2.c_str();
int len2 = s2.length();
max--;
int matrix[2][len2 + 1];
int i;
int j;
/*
Initialize the 0 row of "matrix".
0
1
2
3
*/
for (j = 0; j <= len2; j++) {
matrix[0][j] = j;
}
/* Loop over column. */
for (i = 1; i <= len1; i++) {
char c1;
/* The first value to consider of the ith column. */
int min_j;
/* The last value to consider of the ith column. */
int max_j;
/* The smallest value of the matrix in the ith column. */
int col_min;
/* The next column of the matrix to fill in. */
int next;
/* The previously-filled-in column of the matrix. */
int prev;
c1 = word1[i-1];
min_j = 1;
if (i > max) {
min_j = i - max;
}
max_j = len2;
if (len2 > max + i) {
max_j = max + i;
}
col_min = INT_MAX;
next = i % 2;
if (next == 1) {
prev = 0;
}
else {
prev = 1;
}
matrix[next][0] = i;
/* Loop over rows. */
for (j = 1; j <= len2; j++) {
if (j < min_j || j > max_j) {
/* Put a large value in there. */
matrix[next][j] = max + 1;
}
else {
char c2;
c2 = word2[j-1];
if (c1 == c2) {
/* The character at position i in word1 is the same as
the character at position j in word2. */
matrix[next][j] = matrix[prev][j-1];
}
else {
/* The character at position i in word1 is not the
same as the character at position j in word2, so
work out what the minimum cost for getting to cell
i, j is. */
int del;
int insert;
int substitute;
int minimum;
del = matrix[prev][j] + 1;
insert = matrix[next][j-1] + 1;
substitute = matrix[prev][j-1] + 1;
minimum = del;
if (insert < minimum) {
minimum = insert;
}
if (substitute < minimum) {
minimum = substitute;
}
matrix[next][j] = minimum;
}
}
/* Find the minimum value in the ith column. */
if (matrix[next][j] < col_min) {
col_min = matrix[next][j];
}
}
if (col_min > max) {
/* All the elements of the ith column are greater than the
maximum, so no match less than or equal to max can be
found by looking at succeeding columns. */
return max + 1;
}
}
int returnval = matrix[len1 % 2][len2];
if (returnval > max + 1)
returnval = max + 1;
return returnval;
}
LineSegment::LineSegment()
{
init(0, 0, 0, 0);

View File

@@ -101,7 +101,7 @@ namespace alpr
cv::Mat addLabel(cv::Mat input, std::string label);
int levenshteinDistance (const std::string &s1, const std::string &s2, int max);
std::string toString(int value);
std::string toString(unsigned int value);
std::string toString(float value);

View File

@@ -36,4 +36,26 @@ TEST_CASE( "LineSegment Test", "[2d primitives]" ) {
REQUIRE( median(testarray1, 6) == 3 );
REQUIRE( median(testarray2, 6) == 1 );
REQUIRE( median(testarray3, 0) == 0 );
}
TEST_CASE( "Test Levenshtein Distance", "[levenshtein]" ) {
// Test the maximum works correctly
REQUIRE( levenshteinDistance("asdf", "bbbb", 10) == 4 );
REQUIRE( levenshteinDistance("asdf", "bbbb", 4) == 4 );
REQUIRE( levenshteinDistance("asdf", "bbbb", 3) == 3 );
REQUIRE( levenshteinDistance("asdf", "bbbb", 2) == 2 );
REQUIRE( levenshteinDistance("asdf", "bbbb", 1) == 1 );
REQUIRE( levenshteinDistance("asdf", "bbbb", 0) == 0 );
// Test some substitutions
REQUIRE( levenshteinDistance("P32RX", "PE32RX", 10) == 1 );
REQUIRE( levenshteinDistance("P32RX", "PE32RX", 2) == 1 );
REQUIRE( levenshteinDistance("ASDF11", "ASDF1", 10) == 1 );
REQUIRE( levenshteinDistance("1ASDF1", "ASDF1", 10) == 1 );
REQUIRE( levenshteinDistance("ASD", "ASDF1", 2) == 2 );
REQUIRE( levenshteinDistance("11111", "11I11", 2) == 1 );
REQUIRE( levenshteinDistance("", "AAAA", 2) == 2 );
REQUIRE( levenshteinDistance("BA", "AAAA", 2) == 2 );
}