Created
December 1, 2012 21:55
-
-
Save whyrusleeping/4185399 to your computer and use it in GitHub Desktop.
fuzzy string comp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public const float SCALE = 1.5; | |
public double compStr(string a, string b) | |
{ | |
return (double)lcSubString2(a, b)/ Math.Pow((a.Length > b.Length ? a.Length : b.Length), SCALE); | |
} | |
public int lcSubString2(string _a, string _b) | |
{ | |
string max = ""; | |
int maxai = 0; | |
int maxbi = 0; | |
string substr = ""; | |
for (int i = 0; i < _a.Length; i++) | |
{ | |
for (int j = 0; j < _b.Length; j++) | |
{ | |
substr = string.Empty; | |
for (int k = 0; i + k < _a.Length && j + k < _b.Length; k++) | |
{ | |
if (_a[i + k] == ' ' || _b[j + k] == ' ') | |
break; | |
if (_a[i + k] == _b[j + k]) | |
{ | |
substr += _a[i + k]; | |
} | |
else | |
{ | |
break; | |
} | |
if (substr.Length > max.Length) | |
{ | |
max = substr; | |
maxai = i; | |
maxbi = j; | |
} | |
} | |
} | |
} | |
string string1_part1 = _a.Substring(0, _a.IndexOf(max)); | |
string string1_part2 = _a.Substring(_a.IndexOf(max) + max.Length, _a.Length - (_a.IndexOf(max) + max.Length)); | |
string minusa = string1_part1 + " " + string1_part2; | |
string1_part1 = _b.Substring(0, _b.IndexOf(max)); | |
string1_part2 = _b.Substring(_b.IndexOf(max) + max.Length, _b.Length - (_b.IndexOf(max) + max.Length)); | |
string minusb = string1_part1 + " " + string1_part2; | |
int retval = Math.Pow( max.Length, SCALE); | |
if (minusa.Length > 1 && minusb.Length > 1 && max.Length > 0) | |
retval += lcSubString2(minusa, minusb); | |
return retval; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//Compare Strings | |
//Returns a percentage likelyhood that strings a and b are the same | |
double compStr(string a, string b) | |
{ | |
return (double)lcSubStr(a,b) / (a.length() > b.length() ? a.length() : b.length()); | |
} | |
//Longest Common Substrings | |
//returns the number of characters in all substrings that a and b share | |
int lcSubStr(string a, string b) | |
{ | |
string max = ""; | |
int maxai = 0; | |
int maxbi = 0; | |
string substr = ""; | |
for(int i = 0; i < a.length(); i++) | |
{ | |
for(int j = 0; j < b.length(); j++) | |
{ | |
substr = ""; | |
for(int k = 0; k + i < a.length() && k + j < b.length(); k++) | |
{ | |
if(a[i+k] == b[j+k]) | |
substr += a[i+k]; | |
else | |
break; | |
} | |
if(substr.length() > max.length()) | |
{ | |
max = substr; | |
maxai = i; | |
maxbi = j; | |
} | |
} | |
} | |
int retval = 0; | |
if(max.length() > 0) | |
{ | |
//the left portion of the string 'a' | |
string aleft = a.substr(0,maxai); | |
//the right portion of the string 'a' | |
string aright = a.substr(maxai + max.length()); | |
string bleft = b.substr(0,maxbi); | |
string bright = b.substr(maxbi + max.length()); | |
//if there are characters to compare on the left side, do so | |
if(aleft.length() > 0 && bleft.length() > 0) | |
retval += lcSubStr(aleft, bleft); | |
//same for the right side | |
if(aright.length() > 0 && bright.length() > 0) | |
retval += lcSubStr(aright, bright); | |
} | |
//return the total number of shared characters | |
return retval + max.length(); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
double compStr(string a, string b) | |
{ | |
return (double)lcSubStr(a,b) / (a.length() > b.length() ? a.length() : b.length()); | |
} | |
int lcSubStr(string a, string b) | |
{ | |
string max = ""; | |
int maxai = 0; | |
int maxbi = 0; | |
string substr = ""; | |
for(int i = 0; i < a.length(); i++) | |
{ | |
for(int j = 0; j < b.length(); j++) | |
{ | |
substr = ""; | |
for(int k = 0; k + i < a.length() && k + j < b.length(); k++) | |
{ | |
if(a[i+k] == b[j+k]) | |
substr += a[i+k]; | |
else | |
break; | |
} | |
if(substr.length() > max.length()) | |
{ | |
max = substr; | |
maxai = i; | |
maxbi = j; | |
} | |
} | |
} | |
int retval = 0; | |
if(max.length() > 0) | |
{ | |
string aleft = a.substr(0,maxai); | |
string aright = a.substr(maxai + max.length()); | |
string bleft = b.substr(0,maxbi); | |
string bright = b.substr(maxbi + max.length()); | |
if(aleft.length() > 0 && bleft.length() > 0) | |
retval += lcSubStr(aleft, bleft); | |
if(aright.length() > 0 && bright.length() > 0) | |
retval += lcSubStr(aright, bright); | |
} | |
return retval + max.length(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment