Last active
February 12, 2020 01:25
-
-
Save robertvunabandi/886b318c197125e9abb608e29a416b64 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This is used to solve problem 1-2 of the problem set. | |
# ------------------------------------- # | |
# 6.009 LAB 6 Trie Implementation START # | |
# ------------------------------------- # | |
class Trie: | |
ALLOWED_TYPES = [str, tuple] | |
def __init__(self, type_=None): | |
self.value = None | |
self.children = {} | |
if type_ not in Trie.ALLOWED_TYPES and type_ is not None: | |
raise TypeError(f"Cannot initialize Trie with invalid type {str(type_)}") | |
self.type = type_ | |
def _raise_if_invalid_type(self, key): | |
if type(key) != self.type: | |
if self.type is None: | |
raise TypeError("Invalid type for this Trie. (Type is None)") | |
raise TypeError("Invalid type for this Trie") | |
def has_children(self): | |
return len(self.children.keys()) > 0 | |
def get_node(self, key, create=False): | |
self._raise_if_invalid_type(key) | |
if len(key) == 0: | |
return self | |
char, rest = key[:1], key[1:] | |
if char not in self.children: | |
if not create: | |
aslist = str(list(self)) | |
raise KeyError( | |
f"Character {char} from key {key} is not in this Trie ({aslist})" | |
) | |
self.children[char] = Trie() | |
self.children[char].type = self.type | |
node = self.children[char] | |
return node.get_node(rest, create) | |
def __getitem__(self, key): | |
if self.type is None: | |
raise KeyError("Cannot get from an empty Trie") | |
self._raise_if_invalid_type(key) | |
node = self.get_node(key) | |
if node.value is None: | |
raise KeyError("This Trie doesn't have a value") | |
return node.value | |
def __setitem__(self, key, value): | |
if type(key) not in Trie.ALLOWED_TYPES: | |
raise TypeError(f"The type {str(type(key))} is not allowed in Tries.") | |
if self.type is None: | |
self.type = type(key) | |
self._raise_if_invalid_type(key) | |
node = self.get_node(key, create=True) | |
node.value = value | |
def __delitem__(self, key): | |
self._raise_if_invalid_type(key) | |
try: | |
node = self.get_node(key) | |
except KeyError: | |
return | |
node.value = None | |
def __contains__(self, key): | |
try: | |
_ = self[key] | |
return True | |
except KeyError: | |
return False | |
except TypeError: | |
raise | |
def __iter__(self): | |
if self.value is not None: | |
yield (self.type(), self.value) | |
for prefix in self.children: | |
for key, value in self.children[prefix]: | |
yield (prefix + key, value) | |
# ----------------------------------- # | |
# 6.009 LAB 6 Trie Implementation END # | |
# ----------------------------------- # | |
ALPHABET = "abcdefghijklmnopqrstuvwxyz" | |
def load_words(length): | |
""" Load of words of length `length` and returns a Trie for them """ | |
# These words are coming from: | |
# https://github.com/dwyl/english-words/blob/master/words.txt | |
with open("words.txt", "r") as words_f: | |
lines = words_f.read().splitlines() | |
trie = Trie() | |
chrhex_map = char_to_hex_map() | |
for word in lines: | |
# we only care about alphanumeric words (i.e., those | |
# with characters in our alphabet). | |
if not word.isalpha(): | |
continue | |
if len(word) == length: | |
hexs = tuple(chrhex_map[c] for c in word.lower()) | |
trie[hexs] = True | |
return trie | |
def gen_char_hexs(): | |
# ord(x) converts strings `x` into a number representing the | |
# UTF-8 hex value. E.g., `ord("f") == 0x0066` returns `True` | |
return (ord(l) for l in (ALPHABET)) | |
def get_char_hexs(): | |
return list(gen_char_hexs()) | |
def char_to_hex_map(): | |
return dict(zip(ALPHABET, gen_char_hexs())) | |
def hex_to_char_map(): | |
return dict(zip(gen_char_hexs(), ALPHABET)) | |
def conv_hex_strs_to_int(hex_strings): | |
return [int(hex_s, 16) for hex_s in hex_strings] | |
def letter_pairs_with_xor_value(xor_value): | |
""" | |
Returns a generator for which as a list would be a list | |
of pair hex values such that for each pair (x, y): | |
- x ^ y = xor_value | |
- x and y are in get_char_hexs() | |
- x <= y | |
""" | |
for char1 in gen_char_hexs(): | |
for char2 in gen_char_hexs(): | |
if char1 > char2: | |
continue | |
if char1 ^ char2 == xor_value: | |
yield (char1, char2) | |
def run_problem_two_part_a(): | |
# first, grab the two cyphertexts and xor them together. | |
# this result will be placed inside m1xorm2 | |
cypher_1 = "d3 a4 0a 3e 63 c2 13 3c 41 17 4d 57 85 bb" | |
cypher_2 = "d1 a3 07 26 72 c3 04 20 50 04 5c 50 89 ac" | |
cypher_1_ints = conv_hex_strs_to_int(cypher_1.split(" ")) | |
cypher_2_ints = conv_hex_strs_to_int(cypher_2.split(" ")) | |
# now, we have m1 xor m2. This gives us the difference for | |
# for each pairs of letters. | |
m1xorm2 = [a ^ b for a, b in zip(cypher_1_ints, cypher_2_ints)] | |
# so, if they were encoded with the same key, then those differences | |
# should give us a list of possible pairs of characters based on how | |
# different they are. | |
print(m1xorm2) | |
trie = load_words(14) | |
r1, r2 = solve_dfs(m1xorm2, trie, trie) | |
if r1 is not None: | |
print("message one:", "".join(MAP_HEX_CHR[c] for c in r1)) | |
print("message two:", "".join(MAP_HEX_CHR[c] for c in r2)) | |
else: | |
print("Couldn't find any message one and two") | |
MAP_CHR_HEX = char_to_hex_map() | |
MAP_HEX_CHR = hex_to_char_map() | |
def solve_dfs(m1xorm2, trie_1, trie_2): | |
if len(m1xorm2) == 0: | |
assert trie_1.value is not None | |
assert trie_2.value is not None | |
return [], [] | |
if (not trie_1.has_children()) or (not trie_2.has_children()): | |
return None, None | |
xor_value, rest_m1xorm2 = m1xorm2[0], m1xorm2[1:] | |
for first_char_1, first_char_2 in letter_pairs_with_xor_value(xor_value): | |
fc1 = (first_char_1,) | |
fc2 = (first_char_2,) | |
# try the first order fc1, fc2 | |
if fc1 in trie_1.children and fc2 in trie_2.children: | |
fct_1 = trie_1.get_node(fc1) | |
fct_2 = trie_2.get_node(fc2) | |
assert fct_1 != trie_1 | |
assert fct_2 != trie_2 | |
rest_1, rest_2 = solve_dfs(rest_m1xorm2, fct_1, fct_2) | |
if (rest_1 is not None) and (rest_2 is not None): | |
return [fc1[0]] + rest_1, [fc2[0]] + rest_2 | |
# try the second order fc2, fc1 | |
# micro optimization if they are the same, just skip | |
if first_char_1 == first_char_2: | |
continue | |
fc1, fc2 = fc2, fc1 | |
if fc1 not in trie_1.children: | |
continue | |
if fc2 not in trie_2.children: | |
continue | |
fct_1 = trie_1.get_node(fc1) | |
fct_2 = trie_2.get_node(fc2) | |
rest_1, rest_2 = solve_dfs(rest_m1xorm2, fct_1, fct_2) | |
if (rest_1 is None) or (rest_2 is None): | |
continue | |
return [fc1[0]] + rest_1, [fc2[0]] + rest_2 | |
return None, None | |
def test_dfs_solve_1(): | |
_m1xorm2 = [0x0, 0x4, 0x7] | |
_trie = Trie() | |
_trie[0x63, 0x61, 0x74] = 1 | |
_trie[0x63, 0x65, 0x73] = 1 | |
r1, r2 = solve_dfs(_m1xorm2, _trie, _trie) | |
print(r1, r2) | |
if r1 is None: | |
raise ValueError("DFS Solve didn't work!") | |
ew1 = "cat" | |
ew2 = "ces" | |
w1 = "".join(MAP_HEX_CHR[c] for c in r1) | |
w2 = "".join(MAP_HEX_CHR[c] for c in r2) | |
result = {w1, w2} | |
if ew1 not in result: | |
raise ValueError(f"DFS Solve didn't work correctly! It got: {result}") | |
if ew2 not in result: | |
raise ValueError(f"DFS Solve didn't work correctly! It got: {result}") | |
print(f"DFS Solve test 2 Passed with {result}") | |
def test_dfs_solve_2(): | |
m1xorm2 = [0x2, 0x1A, 0x8] | |
trie = load_words(3) | |
r1, r2 = solve_dfs(m1xorm2, trie, trie) | |
if r1 is None: | |
raise ValueError("DFS Solve didn't work!") | |
ew1 = "xui" | |
ew2 = "zoa" | |
w1 = "".join(MAP_HEX_CHR[c] for c in r1) | |
w2 = "".join(MAP_HEX_CHR[c] for c in r2) | |
result = {w1, w2} | |
if ew1 not in result: | |
# fail only if m1xorm2 is wrong. | |
if tuple(a ^ b for a, b in zip(r1, r2)) != tuple(m1xorm2): | |
raise ValueError(f"DFS Solve didn't work correctly! It got: {result}") | |
if ew2 not in result: | |
# fail only if m1xorm2 is wrong. | |
if tuple(a ^ b for a, b in zip(r1, r2)) != tuple(m1xorm2): | |
raise ValueError(f"DFS Solve didn't work correctly! It got: {result}") | |
print(f"DFS Solve test 2 Passed with {result}") | |
def test_dfs_solve_3(): | |
m1xorm2 = [0xc, 0xd, 0xf, 0xd, 0x1b, 0x9, 0x1c] | |
trie = load_words(7) | |
r1, r2 = solve_dfs(m1xorm2, trie, trie) | |
if r1 is None: | |
raise ValueError("DFS Solve didn't work!") | |
ew1 = "allergy" | |
ew2 = "machine" | |
w1 = "".join(MAP_HEX_CHR[c] for c in r1) | |
w2 = "".join(MAP_HEX_CHR[c] for c in r2) | |
result = {w1, w2} | |
if ew1 not in result: | |
# fail only if m1xorm2 is wrong. | |
if tuple(a ^ b for a, b in zip(r1, r2)) != tuple(m1xorm2): | |
raise ValueError(f"DFS Solve didn't work correctly! It got: {result}") | |
if ew2 not in result: | |
# fail only if m1xorm2 is wrong. | |
if tuple(a ^ b for a, b in zip(r1, r2)) != tuple(m1xorm2): | |
raise ValueError(f"DFS Solve didn't work correctly! It got: {result}") | |
print(f"DFS Solve test 3 Passed with {result}") | |
if __name__ == "__main__": | |
test_dfs_solve_1() | |
test_dfs_solve_2() | |
test_dfs_solve_3() | |
run_problem_two_part_a() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment