Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save dhruvpathak/46ffde6f5ac5c54ea2c64afec73ca828 to your computer and use it in GitHub Desktop.
Save dhruvpathak/46ffde6f5ac5c54ea2c64afec73ca828 to your computer and use it in GitHub Desktop.
String Similarity Performance Python Vs C Bindings
import timeit
setup = '''
import difflib
import jellyfish
import Levenshtein
def difflib_result(first_str,second_str):
return difflib.SequenceMatcher(None, first_str, second_str).ratio()
def jaro_result(first_str,second_str):
return jellyfish.jaro_winkler(first_str, second_str)
def cdifflib_result(first_str,second_str):
return Levenshtein.ratio(first_str,second_str)
def cjaro_result(first_str,second_str):
return Levenshtein.jaro_winkler(first_str,second_str)
'''
print('difflib', min(timeit.Timer("difflib_result('The Shawshank Redemption','The Shaushank Redemtion')", setup=setup).repeat(5, 10000)))
print('c-difflib', min(timeit.Timer("cdifflib_result('The Shawshank Redemption','The Shaushank Redemtion')", setup=setup).repeat(5, 10000)))
print('jaro_winkler', min(timeit.Timer("jaro_result('The Shawshank Redemption','The Shaushank Redemtion')", setup=setup).repeat(5, 10000)))
print('c-jaro_winkler', min(timeit.Timer("cjaro_result('The Shawshank Redemption','The Shaushank Redemtion')", setup=setup).repeat(5, 10000)))
#OUTPUT
#difflib 0.55878395
#c-difflib 0.005077555000000178
#jaro_winkler 0.007728011000000201
#c-jaro_winkler 0.005517472999999828
import timeit
import difflib
import jellyfish
import Levenshtein
def difflib_result(first_str,second_str):
return difflib.SequenceMatcher(None, first_str, second_str).ratio()
def jaro_result(first_str,second_str):
return jellyfish.jaro_winkler(first_str, second_str)
def cdifflib_result(first_str,second_str):
return Levenshtein.ratio(first_str,second_str)
def cjaro_result(first_str,second_str):
return Levenshtein.jaro_winkler(first_str,second_str)
for first_str,second_str in [('The Shawshank Redemption','The Shaushank Redemtion'),('Se7en','Seven')]:
print('difflib',difflib_result(first_str,second_str))
print('c-difflib',cdifflib_result(first_str,second_str))
print('jaro_winkler',jaro_result(first_str,second_str))
print('c-jaro_winkler',cjaro_result(first_str,second_str))
print('\n-----------------------\n')
# OUTPUT
#difflib 0.9361702127659575
#c-difflib 0.9361702127659575
#jaro_winkler 0.9746376811594203
#c-jaro_winkler 0.9873188405797102
#
#-----------------------
#
#difflib 0.8
#c-difflib 0.8
#jaro_winkler 0.8933333333333333
#c-jaro_winkler 0.8933333333333333
#
#-----------------------
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment