Created
January 11, 2015 20:48
-
-
Save ArmedGuy/867137f4d9c6f21ca5d5 to your computer and use it in GitHub Desktop.
Implementation of huffman encoding with no regards to performance. Only idiots would use this
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class BinaryTree: | |
def __init__(self, rootNode): | |
self.root = rootNode | |
def __getNodes(self): | |
return [self.root] + self.root.nodes | |
nodes = property(__getNodes) | |
class BinaryNode: | |
def __init__(self, name, value): | |
self.n1 = None | |
self.n2 = None | |
self.parent = None | |
self.name = name | |
self.value = value | |
def __str__(self): | |
return str(self.value) | |
def __getNodes(self): | |
l = [] | |
if self.n1 != None: | |
l.append(self.n1) | |
l = l + self.n1.nodes | |
if self.n2 != None: | |
l.append(self.n2) | |
l = l + self.n2.nodes | |
return l | |
nodes = property(__getNodes) | |
def smallestNodes(nodes): | |
n1 = -1 | |
n2 = -1 | |
for i in xrange(len(nodes)): | |
if n1 == -1 or nodes[i].value <= nodes[n1].value: | |
n2 = n1 | |
n1 = i | |
elif n2 == -1 or nodes[i].value < nodes[n2].value: | |
n2 = i | |
return (n1, n2) | |
def huffmanWeights(inp): | |
l = list(inp) | |
return [(k, l.count(k)) for k in set(l)] | |
def huffmanTree(values): | |
nodes = [] | |
for v in values: | |
nodes.append(BinaryNode(v[0], v[1])) | |
while True: | |
sm = smallestNodes(nodes) | |
if sm[0] == -1 or sm[1] == -1: | |
return BinaryTree(nodes[0]) | |
newNode = BinaryNode(nodes[sm[0]].name + nodes[sm[1]].name, nodes[sm[0]].value + nodes[sm[1]].value) | |
newNode.n1 = nodes[sm[0]] | |
newNode.n2 = nodes[sm[1]] | |
nodes[sm[0]].parent = newNode | |
nodes[sm[1]].parent = newNode | |
if sm[0] > sm[1]: | |
del nodes[sm[0]] | |
del nodes[sm[1]] | |
else: | |
del nodes[sm[1]] | |
del nodes[sm[0]] | |
nodes.append(newNode) | |
def huffmanPath(n): | |
path = [] | |
c = n | |
while c.parent != None: | |
path.append(1 if c.parent.n1 == c else 0) | |
c = c.parent | |
return path[::-1] | |
def huffmanEncode(tree, message): | |
enc = [] | |
for i in xrange(len(message)): | |
n = [n for n in tree.nodes if n.name == message[i]][0] | |
enc = enc + huffmanPath(n) | |
return enc | |
def huffmanDecode(tree, encoded): | |
message = [] | |
c = tree.root | |
for e in encoded: | |
c = c.n1 if e == 1 else c.n2 | |
if c.n1 == None or c.n2 == None: | |
message.append(c.name) | |
c = tree.root | |
return ''.join(message) | |
if __name__ == '__main__': | |
import math | |
message = "Bacon ipsum dolor amet pork kevin short ribs sirloin, consequat incididunt nisi strip steak spare ribs. Fugiat beef ribs capicola meatball consectetur t-bone shankle et, alcatra commodo venison bresaola ribeye. Elit in spare ribs ea aliqua chicken sed ham quis velit ad cillum. Eiusmod ut t-bone meatloaf. Tail consectetur voluptate pariatur shankle dolore eu non pork in pork loin tri-tip venison drumstick. T-bone non short loin, incididunt pig shank meatloaf ut turducken velit." | |
vals = huffmanWeights(message) # calculate weights | |
tree = huffmanTree(vals) # build tree | |
enc = huffmanEncode(tree, message) # encode | |
print "----------------------------------" | |
print "Encoded: " | |
encStr = [] | |
enc = enc + [0] * (8 - len(enc) % 8) | |
for b in range(len(enc) / 8): | |
byte = enc[b*8:(b+1)*8] | |
encStr.append(chr(int(''.join([str(bit) for bit in byte]), 2))) | |
print ''.join(encStr) | |
print "----------------------------------" | |
print math.ceil(len(enc) / 8.0), "bytes vs", len(message), "bytes" | |
print "ratio", ((len(message) * 8.0) / len(enc)) | |
print "----------------------------------" | |
print "Decoded: " | |
print huffmanDecode(tree, enc) | |
print "----------------------------------" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment