Last active
August 29, 2015 14:13
-
-
Save 46bit/163808ad968c8f62b70a to your computer and use it in GitHub Desktop.
ALPHAC text encrypter that I'm attacking for fun, reimplemented cleanly in Python. Source: http://www.myersdaily.org/joseph/javascript/alphac.html.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from random import randint | |
# ALPHAC encryption method | |
# From http://www.myersdaily.org/joseph/javascript/alphac.html | |
# This a much cleaner Python reimplementation for Cryptanalysis. | |
class Alphac: | |
c64 = list("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/") | |
# s = input string (only characters in c64) | |
# k = key string (only characters in c64) | |
# r = md5(k) string (32-digit hexadecimal, originally lowercase) | |
# crypt_mode = True for encrypt, False for decrypt | |
def alphac(self, s, k, r, crypt_mode): | |
if len(s) == 0 or len(k) == 0 or len(r) == 0: | |
return s | |
k = self.c64_indexes(k) | |
r = self.c64_indexes(r) | |
# N.B. Loops lift out because a 2-block mode using a | divider is never used. | |
if crypt_mode: # Encrypt | |
# Pick a pseudorandom c64 using Mersenne Twister, use it as the acraw seed | |
# and append to the crypttext for decryption use. | |
dn = randint(0, 63) | |
s = self.acraw(s, k, r, dn) + self.c64[dn] | |
else: # Decrypt | |
# Use the final character of the crypttext (as appended during encryption) | |
# as the acraw seed. | |
s = self.acraw(s[:-1], k, r, self.c64.index(s[i][-1])) | |
return "".join(s) | |
# alphar: Strip non-base64, convert array of c64 to array of 0..63 | |
def c64_indexes(self, s): | |
# eliminate all non-base64 chars from s | |
s = list(s) | |
for i in range(0, len(s)): | |
s[i] = self.c64.index(s[i]) | |
return s | |
# Own inverse, acraw(acraw(s, [R]), [R]) = s | |
def acraw(self, s, k, r, seed): | |
s = list(s) | |
for i in range(0, len(s)): | |
s[i] = c64[self.c64.index(s[i]) ^ k[i % len(k)] ^ r[i % len(r)] ^ seed] | |
return "".join(s) | |
# ALPHAC.py encryption process. | |
# s = (i64, i64, ..., i64) | |
# k = key string = (i64, i64, ..., i64) | |
# r = md5(key string) = (i64, i64, ..., i64) # note actually 0-15 | |
# seed = pseudorandom i64 revealed in the crypttext | |
# s2[i] = s[i] ^ k[i % len(k)] ^ r[i % len(r)] ^ seed | |
# s3[i] = c64[s2[i]] | |
# result = (s3[0], s3[1], ... s3[n-1], c64[seed]) | |
# ALPHAC Encryption process. | |
# | |
# plaintext D U C K O F T H E D A Y | |
# ^ key P R A I S E H A S K E L | |
# ^ md5(key) 4 d 6 f 5 9 7 f 7 2 8 b | |
# = crypttext 0 Y 4 D l 8 v Y t / 4 I | |
# | |
# plaintext 03 20 02 10 14 05 19 07 04 03 00 24 | |
# ^ key 15 17 00 08 18 04 07 00 18 10 04 11 | |
# ^ md5(key) 56 29 58 31 57 61 59 31 59 54 60 27 | |
# = crypttext 52 24 56 29 37 60 47 24 45 63 56 08 | |
# | |
# N.B. As the seed (appended final letter of crypttext) can be XORed against | |
# every character of the crypttext to remove, I ignore it here. | |
# Thoughts on Crypto: | |
# - seed has no strength whatsoever (can remove from the crypttext independently) | |
# - i64(plaintext) ^ i64(key) ^ i64(md5(key)) looks to depend on md5 for strength | |
# - md5 can be computed en masse, but how much entropy is it adding? | |
# - each i64(md5(key)) comes from a 0..15 character, so 4 bits of entropy | |
# - the key adds 6 bits of entropy per character, the md5 another 4 | |
# - md5 entropy irremovable without relating key chars to md5(key) chars | |
# - md5 characters are evenly distributed across 0..15 | |
# - key characters would ideally be evenly distributed across i64 | |
# - key characters will be similar to language frequency distributions for using words | |
# - plaintext characters will have language frequency distribution | |
# IDEA: break like the Vigniere Cipher. | |
# | |
# XOR Key is periodic with length LCM(key length, 32 = md5 length). | |
# Find key length by finding the most popular trigrams, taking the LCFs of the | |
# average distances between those trigrams as the key length. | |
# Then frequency analyse between nth characters in each segment. | |
# I've done this all before. | |
# | |
# This requires a lot of text to work for Vigniere, and as the md5 adds 4 bits | |
# of entropy (16 possibilities) I figure we'll need 16 times as much text to get | |
# good results - but only for trigrams. Once we've discovered the key length, | |
# we can treat (nth character key ^ nth character of md5(key)) as a single key. | |
# | |
# If trigrams are rare enough, we might not be able to detect them - if the | |
# md5(key) noise outweighs the trigram signal. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment