Last active
December 28, 2015 08:08
-
-
Save richardkundl/7469011 to your computer and use it in GitHub Desktop.
Computate Levenshtein distance, not a normal returns format. It returns the percentage difference: 0 == perfect match; 100 == totaly different
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/// <summary> | |
/// Computate Levenshtein distance, not a normal returns format. | |
/// It returns the percentage difference. | |
/// </summary> | |
/// <param name="expected"> expected word </param> | |
/// <param name="actual"> actual word </param> | |
/// <returns> Value between 0 - 100 | |
/// 0==perfect match 100==totaly different </returns> | |
public static byte GetLevenshteinDistance(string expected, string actual) | |
{ | |
var rowLen = (byte)expected.Length; | |
var colLen = (byte)actual.Length; | |
int rowIdx; | |
int colIdx; | |
if (Math.Max(expected.Length, actual.Length) > Math.Pow(2, 31)) | |
{ | |
throw new Exception("\nMaximum string length in Levenshtein.iLD is " + Math.Pow(2, 31) + ".\nYours is " + Math.Max(expected.Length, actual.Length) + "."); | |
} | |
if (rowLen == 0) | |
{ | |
return colLen; | |
} | |
if (colLen == 0) | |
{ | |
return rowLen; | |
} | |
var v0 = new int[rowLen + 1]; | |
var v1 = new int[rowLen + 1]; | |
for (rowIdx = 1; rowIdx <= rowLen; rowIdx++) | |
{ | |
v0[rowIdx] = rowIdx; | |
} | |
for (colIdx = 1; colIdx <= colLen; colIdx++) | |
{ | |
v1[0] = colIdx; | |
var colJ = actual[colIdx - 1]; | |
for (rowIdx = 1; rowIdx <= rowLen; rowIdx++) | |
{ | |
var rowI = expected[rowIdx - 1]; | |
var cost = rowI == colJ ? 0 : 1; | |
int mMin = v0[rowIdx] + 1; | |
int b = v1[rowIdx - 1] + 1; | |
int c = v0[rowIdx - 1] + cost; | |
if (b < mMin) | |
{ | |
mMin = b; | |
} | |
if (c < mMin) | |
{ | |
mMin = c; | |
} | |
v1[rowIdx] = mMin; | |
} | |
var vTmp = v0; | |
v0 = v1; | |
v1 = vTmp; | |
} | |
var max = Math.Max(rowLen, colLen); | |
return (byte)((100 * v0[rowLen]) / max); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment