Created
September 19, 2017 12:44
-
-
Save chipbell4/7b74332f6cf52d69ecc74cd1ef50dc1d to your computer and use it in GitHub Desktop.
A Knuth-Morris-Pratt Algorithm example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def build_table(W): | |
# initialize our table | |
T = [-1 for element in W] | |
# the current position we're calculating a table value | |
pos = 1 | |
# an extra index for referencing the candidate location that a mismatch would start over at. | |
cnd = 0 | |
while pos < len(W): | |
if W[pos] == W[cnd]: | |
# there was a match. Point this position to whatever the candidate position's table value has (since | |
# only going back to cnd directly would just cause another mismatch) | |
T[pos] = T[cnd] | |
else: | |
# There was a mismatch. Make T[pos] point back to cnd, so that if a mismatch occurs here searches will | |
# continue at cnd again | |
T[pos] = cnd | |
# now, roll cnd backwards through the table until we hit a match again OR we can't go backwards anymore | |
while cnd >= 0 and W[pos] <> W[cnd]: | |
cnd = T[cnd] | |
# now that we're either matching or cnd is back at the beginning, move forward and continue building the table | |
pos += 1 | |
cnd += 1 | |
return T | |
# finds the first index of W in S | |
def kmp(S, W): | |
T = build_table(W) | |
m = 0 # the index of the match, in S | |
i = 0 # the index we're at in W | |
while m + i < len(S): | |
if W[i] == S[m + i]: | |
# if we've found a match, move our "check index" up. If we've run out of characters, we've found a match | |
i += 1 | |
if i == len(W): | |
return m | |
else: | |
# otherwise, there was a mismatch. Use our table to "shift" W forward by the correct amount so we never | |
# have to double check a value in S (m + i is never the same value twice). The shift happens because we | |
# changed m. However, we have to reset i because we're starting over checking W. | |
m = m + i - T[i] | |
if T[i] > -1: | |
i = T[i] | |
else: | |
i = 0 | |
# no match found | |
return -1 | |
S = 'abcabcabcdf' | |
W = 'abcd' | |
print(kmp(S, W)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment