robertvunabandi · February 12, 2020 01:25
diff --git a/otp-decrypt.py b/otp-decrypt.py
 # This is used to solve problem 1-2 of the problem set.
 # ------------------------------------- #
 # 6.009 LAB 6 Trie Implementation START #
 # ------------------------------------- #

 class Trie:
    ALLOWED_TYPES = [str, tuple]

    def __init__(self, type_=None):
        self.value = None
        self.children = {}
        if type_ not in Trie.ALLOWED_TYPES and type_ is not None:
            raise TypeError(f"Cannot initialize Trie with invalid type {str(type_)}")
        self.type = type_

    def _raise_if_invalid_type(self, key):
        if type(key) != self.type:
            if self.type is None:
                raise TypeError("Invalid type for this Trie. (Type is None)")
            raise TypeError("Invalid type for this Trie")

    def has_children(self):
        return len(self.children.keys()) > 0

    def get_node(self, key, create=False):
        self._raise_if_invalid_type(key)
        if len(key) == 0:
            return self
        char, rest = key[:1], key[1:]
        if char not in self.children:
            if not create:
                aslist = str(list(self))
                raise KeyError(
                    f"Character {char} from key {key} is not in this Trie ({aslist})"
                )
            self.children[char] = Trie()
            self.children[char].type = self.type
        node = self.children[char]
        return node.get_node(rest, create)

    def __getitem__(self, key):
        if self.type is None:
            raise KeyError("Cannot get from an empty Trie")
        self._raise_if_invalid_type(key)
        node = self.get_node(key)
        if node.value is None:
            raise KeyError("This Trie doesn't have a value")
        return node.value

    def __setitem__(self, key, value):
        if type(key) not in Trie.ALLOWED_TYPES:
            raise TypeError(f"The type {str(type(key))} is not allowed in Tries.")
        if self.type is None:
            self.type = type(key)
        self._raise_if_invalid_type(key)
        node = self.get_node(key, create=True)
        node.value = value

    def __delitem__(self, key):
        self._raise_if_invalid_type(key)
        try:
            node = self.get_node(key)
        except KeyError:
            return
        node.value = None

    def __contains__(self, key):
        try:
            _ = self[key]
            return True
        except KeyError:
            return False
        except TypeError:
            raise

    def __iter__(self):
        if self.value is not None:
            yield (self.type(), self.value)

        for prefix in self.children:
            for key, value in self.children[prefix]:
                yield (prefix + key, value)

 # ----------------------------------- #
 # 6.009 LAB 6 Trie Implementation END #
 # ----------------------------------- #


 ALPHABET = "abcdefghijklmnopqrstuvwxyz"


 def load_words(length):
    """ Load of words of length `length` and returns a Trie for them """
    # These words are coming from:
    # https://github.com/dwyl/english-words/blob/master/words.txt
    with open("words.txt", "r") as words_f:
        lines = words_f.read().splitlines()
        trie = Trie()
        chrhex_map = char_to_hex_map()
        for word in lines:
            # we only care about alphanumeric words (i.e., those
            # with characters in our alphabet).
            if not word.isalpha():
                continue
            if len(word) == length:
                hexs = tuple(chrhex_map[c] for c in word.lower())
                trie[hexs] = True
        return trie


 def gen_char_hexs():
    # ord(x) converts strings `x` into a number representing the
    # UTF-8 hex value. E.g., `ord("f") == 0x0066` returns `True`
    return (ord(l) for l in (ALPHABET))

 def get_char_hexs():
    return list(gen_char_hexs())

 def char_to_hex_map():
    return dict(zip(ALPHABET, gen_char_hexs()))

 def hex_to_char_map():
    return dict(zip(gen_char_hexs(), ALPHABET))

 def conv_hex_strs_to_int(hex_strings):
    return [int(hex_s, 16) for hex_s in hex_strings]

 def letter_pairs_with_xor_value(xor_value):
    """
    Returns a generator for which as a list would be a list
    of pair hex values such that for each pair (x, y):
    - x ^ y = xor_value
    - x and y are in get_char_hexs()
    - x <= y
    """
    for char1 in gen_char_hexs():
        for char2 in gen_char_hexs():
            if char1 > char2:
                continue
            if char1 ^ char2 == xor_value:
                yield (char1, char2)

 def run_problem_two_part_a():
    # first, grab the two cyphertexts and xor them together.
    # this result will be placed inside m1xorm2
    cypher_1 = "d3 a4 0a 3e 63 c2 13 3c 41 17 4d 57 85 bb"
    cypher_2 = "d1 a3 07 26 72 c3 04 20 50 04 5c 50 89 ac"
    cypher_1_ints = conv_hex_strs_to_int(cypher_1.split(" "))
    cypher_2_ints = conv_hex_strs_to_int(cypher_2.split(" "))
    # now, we have m1 xor m2. This gives us the difference for
    # for each pairs of letters.
    m1xorm2 = [a ^ b for a, b in zip(cypher_1_ints, cypher_2_ints)]
    # so, if they were encoded with the same key, then those differences
    # should give us a list of possible pairs of characters based on how
    # different they are.
    print(m1xorm2)
    trie = load_words(14)
    r1, r2 = solve_dfs(m1xorm2, trie, trie)
    if r1 is not None:
        print("message one:", "".join(MAP_HEX_CHR[c] for c in r1))
        print("message two:", "".join(MAP_HEX_CHR[c] for c in r2))
    else:
        print("Couldn't find any message one and two")


 MAP_CHR_HEX = char_to_hex_map()
 MAP_HEX_CHR = hex_to_char_map()


 def solve_dfs(m1xorm2, trie_1, trie_2):
    if len(m1xorm2) == 0:
        assert trie_1.value is not None
        assert trie_2.value is not None
        return [], []
    if (not trie_1.has_children()) or (not trie_2.has_children()):
        return None, None

    xor_value, rest_m1xorm2 = m1xorm2[0], m1xorm2[1:]
    for first_char_1, first_char_2 in letter_pairs_with_xor_value(xor_value):
        fc1 = (first_char_1,)
        fc2 = (first_char_2,)
        # try the first order fc1, fc2
        if fc1 in trie_1.children and fc2 in trie_2.children:
            fct_1 = trie_1.get_node(fc1)
            fct_2 = trie_2.get_node(fc2)
            assert fct_1 != trie_1
            assert fct_2 != trie_2
            rest_1, rest_2 = solve_dfs(rest_m1xorm2, fct_1, fct_2)
            if (rest_1 is not None) and (rest_2 is not None):
                return [fc1[0]] + rest_1, [fc2[0]] + rest_2

        # try the second order fc2, fc1
        # micro optimization if they are the same, just skip
        if first_char_1 == first_char_2:
            continue
        fc1, fc2 = fc2, fc1
        if fc1 not in trie_1.children:
            continue
        if fc2 not in trie_2.children:
            continue
        fct_1 = trie_1.get_node(fc1)
        fct_2 = trie_2.get_node(fc2)
        rest_1, rest_2 = solve_dfs(rest_m1xorm2, fct_1, fct_2)
        if (rest_1 is None) or (rest_2 is None):
            continue
        return [fc1[0]] + rest_1, [fc2[0]] + rest_2
    return None, None


 def test_dfs_solve_1():
    _m1xorm2 = [0x0, 0x4, 0x7]
    _trie = Trie()
    _trie[0x63, 0x61, 0x74] = 1
    _trie[0x63, 0x65, 0x73] = 1
    r1, r2 = solve_dfs(_m1xorm2, _trie, _trie)
    print(r1, r2)
    if r1 is None:
        raise ValueError("DFS Solve didn't work!")
    ew1 = "cat"
    ew2 = "ces"
    w1 = "".join(MAP_HEX_CHR[c] for c in r1)
    w2 = "".join(MAP_HEX_CHR[c] for c in r2)
    result = {w1, w2}
    if ew1 not in result:
        raise ValueError(f"DFS Solve didn't work correctly! It got: {result}")
    if ew2 not in result:
        raise ValueError(f"DFS Solve didn't work correctly! It got: {result}")
    print(f"DFS Solve test 2 Passed with {result}")


 def test_dfs_solve_2():
    m1xorm2 = [0x2, 0x1A, 0x8]
    trie = load_words(3)
    r1, r2 = solve_dfs(m1xorm2, trie, trie)
    if r1 is None:
        raise ValueError("DFS Solve didn't work!")
    ew1 = "xui"
    ew2 = "zoa"
    w1 = "".join(MAP_HEX_CHR[c] for c in r1)
    w2 = "".join(MAP_HEX_CHR[c] for c in r2)
    result = {w1, w2}
    if ew1 not in result:
        # fail only if m1xorm2 is wrong.
        if tuple(a ^ b for a, b in zip(r1, r2)) != tuple(m1xorm2):
            raise ValueError(f"DFS Solve didn't work correctly! It got: {result}")
    if ew2 not in result:
        # fail only if m1xorm2 is wrong.
        if tuple(a ^ b for a, b in zip(r1, r2)) != tuple(m1xorm2):
            raise ValueError(f"DFS Solve didn't work correctly! It got: {result}")
    print(f"DFS Solve test 2 Passed with {result}")


 def test_dfs_solve_3():
    m1xorm2 = [0xc, 0xd, 0xf, 0xd, 0x1b, 0x9, 0x1c]
    trie = load_words(7)
    r1, r2 = solve_dfs(m1xorm2, trie, trie)
    if r1 is None:
        raise ValueError("DFS Solve didn't work!")
    ew1 = "allergy"
    ew2 = "machine"
    w1 = "".join(MAP_HEX_CHR[c] for c in r1)
    w2 = "".join(MAP_HEX_CHR[c] for c in r2)
    result = {w1, w2}
    if ew1 not in result:
        # fail only if m1xorm2 is wrong.
        if tuple(a ^ b for a, b in zip(r1, r2)) != tuple(m1xorm2):
            raise ValueError(f"DFS Solve didn't work correctly! It got: {result}")
    if ew2 not in result:
        # fail only if m1xorm2 is wrong.
        if tuple(a ^ b for a, b in zip(r1, r2)) != tuple(m1xorm2):
            raise ValueError(f"DFS Solve didn't work correctly! It got: {result}")
    print(f"DFS Solve test 3 Passed with {result}")



 if __name__ == "__main__":
    test_dfs_solve_1()
    test_dfs_solve_2()
    test_dfs_solve_3()
    run_problem_two_part_a()
	# This is used to solve problem 1-2 of the problem set.
	# ------------------------------------- #
	# 6.009 LAB 6 Trie Implementation START #
	# ------------------------------------- #

	class Trie:
	ALLOWED_TYPES = [str, tuple]

	def __init__(self, type_=None):
	self.value = None
	self.children = {}
	if type_ not in Trie.ALLOWED_TYPES and type_ is not None:
	raise TypeError(f"Cannot initialize Trie with invalid type {str(type_)}")
	self.type = type_

	def _raise_if_invalid_type(self, key):
	if type(key) != self.type:
	if self.type is None:
	raise TypeError("Invalid type for this Trie. (Type is None)")
	raise TypeError("Invalid type for this Trie")

	def has_children(self):
	return len(self.children.keys()) > 0

	def get_node(self, key, create=False):
	self._raise_if_invalid_type(key)
	if len(key) == 0:
	return self
	char, rest = key[:1], key[1:]
	if char not in self.children:
	if not create:
	aslist = str(list(self))
	raise KeyError(
	f"Character {char} from key {key} is not in this Trie ({aslist})"
	)
	self.children[char] = Trie()
	self.children[char].type = self.type
	node = self.children[char]
	return node.get_node(rest, create)

	def __getitem__(self, key):
	if self.type is None:
	raise KeyError("Cannot get from an empty Trie")
	self._raise_if_invalid_type(key)
	node = self.get_node(key)
	if node.value is None:
	raise KeyError("This Trie doesn't have a value")
	return node.value

	def __setitem__(self, key, value):
	if type(key) not in Trie.ALLOWED_TYPES:
	raise TypeError(f"The type {str(type(key))} is not allowed in Tries.")
	if self.type is None:
	self.type = type(key)
	self._raise_if_invalid_type(key)
	node = self.get_node(key, create=True)
	node.value = value

	def __delitem__(self, key):
	self._raise_if_invalid_type(key)
	try:
	node = self.get_node(key)
	except KeyError:
	return
	node.value = None

	def __contains__(self, key):
	try:
	_ = self[key]
	return True
	except KeyError:
	return False
	except TypeError:
	raise

	def __iter__(self):
	if self.value is not None:
	yield (self.type(), self.value)

	for prefix in self.children:
	for key, value in self.children[prefix]:
	yield (prefix + key, value)

	# ----------------------------------- #
	# 6.009 LAB 6 Trie Implementation END #
	# ----------------------------------- #


	ALPHABET = "abcdefghijklmnopqrstuvwxyz"


	def load_words(length):
	""" Load of words of length `length` and returns a Trie for them """
	# These words are coming from:
	# https://github.com/dwyl/english-words/blob/master/words.txt
	with open("words.txt", "r") as words_f:
	lines = words_f.read().splitlines()
	trie = Trie()
	chrhex_map = char_to_hex_map()
	for word in lines:
	# we only care about alphanumeric words (i.e., those
	# with characters in our alphabet).
	if not word.isalpha():
	continue
	if len(word) == length:
	hexs = tuple(chrhex_map[c] for c in word.lower())
	trie[hexs] = True
	return trie


	def gen_char_hexs():
	# ord(x) converts strings `x` into a number representing the
	# UTF-8 hex value. E.g., `ord("f") == 0x0066` returns `True`
	return (ord(l) for l in (ALPHABET))

	def get_char_hexs():
	return list(gen_char_hexs())

	def char_to_hex_map():
	return dict(zip(ALPHABET, gen_char_hexs()))

	def hex_to_char_map():
	return dict(zip(gen_char_hexs(), ALPHABET))

	def conv_hex_strs_to_int(hex_strings):
	return [int(hex_s, 16) for hex_s in hex_strings]

	def letter_pairs_with_xor_value(xor_value):
	"""
	Returns a generator for which as a list would be a list
	of pair hex values such that for each pair (x, y):
	- x ^ y = xor_value
	- x and y are in get_char_hexs()
	- x <= y
	"""
	for char1 in gen_char_hexs():
	for char2 in gen_char_hexs():
	if char1 > char2:
	continue
	if char1 ^ char2 == xor_value:
	yield (char1, char2)

	def run_problem_two_part_a():
	# first, grab the two cyphertexts and xor them together.
	# this result will be placed inside m1xorm2
	cypher_1 = "d3 a4 0a 3e 63 c2 13 3c 41 17 4d 57 85 bb"
	cypher_2 = "d1 a3 07 26 72 c3 04 20 50 04 5c 50 89 ac"
	cypher_1_ints = conv_hex_strs_to_int(cypher_1.split(" "))
	cypher_2_ints = conv_hex_strs_to_int(cypher_2.split(" "))
	# now, we have m1 xor m2. This gives us the difference for
	# for each pairs of letters.
	m1xorm2 = [a ^ b for a, b in zip(cypher_1_ints, cypher_2_ints)]
	# so, if they were encoded with the same key, then those differences
	# should give us a list of possible pairs of characters based on how
	# different they are.
	print(m1xorm2)
	trie = load_words(14)
	r1, r2 = solve_dfs(m1xorm2, trie, trie)
	if r1 is not None:
	print("message one:", "".join(MAP_HEX_CHR[c] for c in r1))
	print("message two:", "".join(MAP_HEX_CHR[c] for c in r2))
	else:
	print("Couldn't find any message one and two")


	MAP_CHR_HEX = char_to_hex_map()
	MAP_HEX_CHR = hex_to_char_map()


	def solve_dfs(m1xorm2, trie_1, trie_2):
	if len(m1xorm2) == 0:
	assert trie_1.value is not None
	assert trie_2.value is not None
	return [], []
	if (not trie_1.has_children()) or (not trie_2.has_children()):
	return None, None

	xor_value, rest_m1xorm2 = m1xorm2[0], m1xorm2[1:]
	for first_char_1, first_char_2 in letter_pairs_with_xor_value(xor_value):
	fc1 = (first_char_1,)
	fc2 = (first_char_2,)
	# try the first order fc1, fc2
	if fc1 in trie_1.children and fc2 in trie_2.children:
	fct_1 = trie_1.get_node(fc1)
	fct_2 = trie_2.get_node(fc2)
	assert fct_1 != trie_1
	assert fct_2 != trie_2
	rest_1, rest_2 = solve_dfs(rest_m1xorm2, fct_1, fct_2)
	if (rest_1 is not None) and (rest_2 is not None):
	return [fc1[0]] + rest_1, [fc2[0]] + rest_2

	# try the second order fc2, fc1
	# micro optimization if they are the same, just skip
	if first_char_1 == first_char_2:
	continue
	fc1, fc2 = fc2, fc1
	if fc1 not in trie_1.children:
	continue
	if fc2 not in trie_2.children:
	continue
	fct_1 = trie_1.get_node(fc1)
	fct_2 = trie_2.get_node(fc2)
	rest_1, rest_2 = solve_dfs(rest_m1xorm2, fct_1, fct_2)
	if (rest_1 is None) or (rest_2 is None):
	continue
	return [fc1[0]] + rest_1, [fc2[0]] + rest_2
	return None, None


	def test_dfs_solve_1():
	_m1xorm2 = [0x0, 0x4, 0x7]
	_trie = Trie()
	_trie[0x63, 0x61, 0x74] = 1
	_trie[0x63, 0x65, 0x73] = 1
	r1, r2 = solve_dfs(_m1xorm2, _trie, _trie)
	print(r1, r2)
	if r1 is None:
	raise ValueError("DFS Solve didn't work!")
	ew1 = "cat"
	ew2 = "ces"
	w1 = "".join(MAP_HEX_CHR[c] for c in r1)
	w2 = "".join(MAP_HEX_CHR[c] for c in r2)
	result = {w1, w2}
	if ew1 not in result:
	raise ValueError(f"DFS Solve didn't work correctly! It got: {result}")
	if ew2 not in result:
	raise ValueError(f"DFS Solve didn't work correctly! It got: {result}")
	print(f"DFS Solve test 2 Passed with {result}")


	def test_dfs_solve_2():
	m1xorm2 = [0x2, 0x1A, 0x8]
	trie = load_words(3)
	r1, r2 = solve_dfs(m1xorm2, trie, trie)
	if r1 is None:
	raise ValueError("DFS Solve didn't work!")
	ew1 = "xui"
	ew2 = "zoa"
	w1 = "".join(MAP_HEX_CHR[c] for c in r1)
	w2 = "".join(MAP_HEX_CHR[c] for c in r2)
	result = {w1, w2}
	if ew1 not in result:
	# fail only if m1xorm2 is wrong.
	if tuple(a ^ b for a, b in zip(r1, r2)) != tuple(m1xorm2):
	raise ValueError(f"DFS Solve didn't work correctly! It got: {result}")
	if ew2 not in result:
	# fail only if m1xorm2 is wrong.
	if tuple(a ^ b for a, b in zip(r1, r2)) != tuple(m1xorm2):
	raise ValueError(f"DFS Solve didn't work correctly! It got: {result}")
	print(f"DFS Solve test 2 Passed with {result}")


	def test_dfs_solve_3():
	m1xorm2 = [0xc, 0xd, 0xf, 0xd, 0x1b, 0x9, 0x1c]
	trie = load_words(7)
	r1, r2 = solve_dfs(m1xorm2, trie, trie)
	if r1 is None:
	raise ValueError("DFS Solve didn't work!")
	ew1 = "allergy"
	ew2 = "machine"
	w1 = "".join(MAP_HEX_CHR[c] for c in r1)
	w2 = "".join(MAP_HEX_CHR[c] for c in r2)
	result = {w1, w2}
	if ew1 not in result:
	# fail only if m1xorm2 is wrong.
	if tuple(a ^ b for a, b in zip(r1, r2)) != tuple(m1xorm2):
	raise ValueError(f"DFS Solve didn't work correctly! It got: {result}")
	if ew2 not in result:
	# fail only if m1xorm2 is wrong.
	if tuple(a ^ b for a, b in zip(r1, r2)) != tuple(m1xorm2):
	raise ValueError(f"DFS Solve didn't work correctly! It got: {result}")
	print(f"DFS Solve test 3 Passed with {result}")



	if __name__ == "__main__":
	test_dfs_solve_1()
	test_dfs_solve_2()
	test_dfs_solve_3()
	run_problem_two_part_a()