In this article, I will show you how to implement edit distance algorithm in C#. We also see how to print the result.
The edit distance of two strings, s1 and s2, is defined as the minimum number of point mutations required to change s1 into s2, where a point mutation is one of:
- Replace a letter,
- Insert a letter, or
- Delete a letter
Let’s consider the two wordscolor
andcolour
if you apply the above logic you will find that edit distance is 1 because you have to insert one characteru
to make color to colour.
void Main()
{
EditDistanceAlgo.LevenshteinDistance("color", "colour");
}
public class EditDistanceAlgo
{
private static int[,] m;
private static char diagonalCharacter;
public static int LevenshteinDistance(string first, string second)
{
int[,] characterMap = new int[first.Length + 1, second.Length + 1];
for (int i = 0; i <= first.Length; i++)
characterMap[i, 0] = i;
for (int j = 0; j <= second.Length; j++)
characterMap[0, j] = j;
for (int j = 1; j <= second.Length; j++)
for (int i = 1; i <= first.Length; i++)
if (first[i - 1] == second[j - 1])
characterMap[i, j] = characterMap[i - 1, j - 1]; //no operation
else
characterMap[i, j] = Math.Min(Math.Min(
characterMap[i - 1, j] + 1, //a deletion
characterMap[i, j - 1] + 1), //an insertion
characterMap[i - 1, j - 1] + 1 //a substitution
);
PrintPath("", "", "", characterMap, first.Length, second.Length, first, second);
return characterMap[first.Length, second.Length];
}
private static void PrintPath(string row1, string row2, string row3, int[,] characterMap, int i, int j, string first, string second)
{
string result = "";
if (i > 0 && j > 0)
{
var diag = characterMap[i - 1, j - 1];
diagonalCharacter = '|';
if (first[i - 1] != second[j - 1])
{
diag++; diagonalCharacter = '\t';
}
if (characterMap[i, j] == diag)
PrintPath(first[i - 1] + row1, diagonalCharacter + row2, second[j - 1] + row3, characterMap, i - 1, j - 1, first, second); // change or match
else if (characterMap[i, j] == characterMap[i - 1, j] - 0 + 1) // delete
PrintPath(first[i - 1] + row1, ' ' + row2, '-' + row3, characterMap, i - 1, j, first, second);
else
PrintPath('-' + row1, ' ' + row2, second[j - 1] + row3, characterMap, i, j - 1, first, second); // insertion
}
else if (i > 0)
PrintPath(first[i - 1] + row1, ' ' + row2, '-' + row3, characterMap, i - 1, j, first, second);
else if (j > 0)
PrintPath('-' + row1, ' ' + row2, second[j - 1] + row3, characterMap, i, j - 1, first, second);
else // i==0 and j==0
result += row1 + '\n' + row2 + '\n' + row3 + '\n';
Console.WriteLine(result);
}
}