In this article, I will show you how to implement edit distance( Levenshtein) algorithm in C#. We also see how to print the result.
The Levenshtein distance is a measure of the number of changes (insertions, deletions or substitutions) needed to change one string into another. The Levenshtein distance takes its name from Soviet mathematician Vladimir Levenshtein who developed it in 1966
Implementation of the Levenshtein distance is as follows:
- Counts and saves the cost of all edits
- For each edit, compare the edit and original strings and calculate the number of edits. If their costs are equal, then return their cost as a match. If they are not equal, choose that which took less cost to calculate.
- After all edits processed, find the minimum cost for each substring and assign this value to be its “edit-distance” for that substring.
The edit distance of two strings, s1 and s2, is defined as the minimum number of point mutations required to change s1 into s2, where a point mutation is one of:
- Replace a letter,
- Insert a letter, or
- Delete a letter
Let’s consider the two wordscolor
andcolour
if you apply the above logic you will find that edit distance is 1 because you have to insert one characteru
to make color to colour.
void Main()
{
EditDistanceAlgo.LevenshteinDistance("color", "colour");
}
public class EditDistanceAlgo
{
private static int[,] m;
private static char diagonalCharacter;
public static int LevenshteinDistance(string first, string second)
{
int[,] characterMap = new int[first.Length + 1, second.Length + 1];
for (int i = 0; i <= first.Length; i++)
characterMap[i, 0] = i;
for (int j = 0; j <= second.Length; j++)
characterMap[0, j] = j;
for (int j = 1; j <= second.Length; j++)
for (int i = 1; i <= first.Length; i++)
if (first[i - 1] == second[j - 1])
characterMap[i, j] = characterMap[i - 1, j - 1]; //no operation
else
characterMap[i, j] = Math.Min(Math.Min(
characterMap[i - 1, j] + 1, //a deletion
characterMap[i, j - 1] + 1), //an insertion
characterMap[i - 1, j - 1] + 1 //a substitution
);
PrintPath("", "", "", characterMap, first.Length, second.Length, first, second);
return characterMap[first.Length, second.Length];
}
private static void PrintPath(string row1, string row2, string row3, int[,] characterMap, int i, int j, string first, string second)
{
string result = "";
if (i > 0 && j > 0)
{
var diag = characterMap[i - 1, j - 1];
diagonalCharacter = '|';
if (first[i - 1] != second[j - 1])
{
diag++; diagonalCharacter = '\t';
}
if (characterMap[i, j] == diag)
PrintPath(first[i - 1] + row1, diagonalCharacter + row2, second[j - 1] + row3, characterMap, i - 1, j - 1, first, second); // change or match
else if (characterMap[i, j] == characterMap[i - 1, j] - 0 + 1) // delete
PrintPath(first[i - 1] + row1, ' ' + row2, '-' + row3, characterMap, i - 1, j, first, second);
else
PrintPath('-' + row1, ' ' + row2, second[j - 1] + row3, characterMap, i, j - 1, first, second); // insertion
}
else if (i > 0)
PrintPath(first[i - 1] + row1, ' ' + row2, '-' + row3, characterMap, i - 1, j, first, second);
else if (j > 0)
PrintPath('-' + row1, ' ' + row2, second[j - 1] + row3, characterMap, i, j - 1, first, second);
else // i==0 and j==0
result += row1 + '\n' + row2 + '\n' + row3 + '\n';
Console.WriteLine(result);
}
}