Created
December 30, 2016 10:11
-
-
Save antgustech/2314c0079e67dadac32f58b90d02f920 to your computer and use it in GitHub Desktop.
HuffmanTree Java implementation.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.ArrayList; | |
import java.util.Hashtable; | |
import java.util.List; | |
import java.util.Map; | |
import java.util.PriorityQueue; | |
import java.util.stream.Collectors; | |
/** | |
*A huffman tree implementation. To test it, | |
*create a new instance of this class and | |
*pass in a string to encode. | |
*This class doesn't encode anything in reality | |
*but merley shows the encoding as strings. | |
*/ | |
public class HuffmanTree { | |
private PriorityQueue<Node> pq; | |
private Node[] nodeArray; | |
private String s; | |
private char charArr[]; | |
private int charFreqs[]; | |
/** | |
* Constructor that initializes variables and arrays. | |
* | |
* @param s - the string to compress. | |
*/ | |
public HuffmanTree(String s) { | |
this.s = s; | |
buildArrays(); | |
pq = new PriorityQueue<Node>(charFreqs.length); | |
nodeArray = new Node[charArr.length]; | |
// Create Nodes from the chars and occurrences and put them in a array. | |
for (int i = 0; i < charArr.length; i++) { | |
nodeArray[i] = new Node(charArr[i], charFreqs[i]); | |
} | |
buildTree(); | |
} | |
private void buildTree() { | |
Node left, right, top; | |
// Put the Nodes from the node array in a min-heap/priorityQueue. | |
for (int i = 0; i < nodeArray.length; i++) { | |
pq.add(nodeArray[i]); | |
} | |
// Find two trees with least freq and creates a new node and inserts it. | |
while (pq.size() > 1) { | |
left = pq.remove(); | |
right = pq.remove(); | |
int newFreq = left.getFreq() + right.getFreq(); | |
top = new Node('$', newFreq, left, right); | |
pq.add(top); | |
} | |
// Now the min heap only contains one node with the character $ | |
// and it has all the other nodes as children. | |
// It's frequency should be the same as the total | |
// number of characters in the string. | |
// This is our complete tree. | |
encode(pq.remove(), ""); | |
} | |
/** | |
* Set's the encoding for every node by depth first traversal through the | |
* tree. | |
* | |
* @param n - the current node. | |
* @param c - the code for the current node. | |
*/ | |
private void encode(Node n, String c) { | |
if (!n.isLeafNode()) { | |
// While going left append 0 | |
// System.out.println("LEFT"); | |
c += 0; | |
encode(n.getLeft(), c); | |
// while going right, append 1 | |
// System.out.println("RIGHT"); | |
c += 1; | |
encode(n.getRight(), c); | |
} else { | |
// System.out.println("LEAF-" + code); | |
if (c.length() > 0) { | |
c = c.substring(0, c.length() - 1); // Removes one zero | |
} | |
// Set the code of the node. | |
n.setCode(String.valueOf(c)); | |
} | |
} | |
/** | |
* Finds occurencess of each letter in the given string and initializes the | |
* arrays containing the letters and their frequencies. | |
* | |
* @param s | |
*/ | |
private void buildArrays() { | |
List<String> original = s.chars().mapToObj(i -> (char) i).map(String::valueOf).collect(Collectors.toList()); | |
List<String> duplicateRemoved = s.chars().mapToObj(i -> (char) i).map(String::valueOf).distinct() | |
.collect(Collectors.toList()); | |
ArrayList<Integer> Occurrences = new ArrayList<>(); | |
int counter = 1; | |
for (String aList : duplicateRemoved) { | |
counter = (int) original.stream().filter(s1 -> s1.equals(aList)).count(); | |
Occurrences.add(counter); | |
} | |
// Assign the values to the arrays: | |
charFreqs = new int[duplicateRemoved.size()]; | |
charArr = new char[duplicateRemoved.size()]; | |
for (int i = 0; i < charArr.length; i++) { | |
charArr[i] = duplicateRemoved.get(i).charAt(0); | |
charFreqs[i] = Occurrences.get(i); | |
} | |
} | |
/** | |
* Loops through the nodes and arrays to print their values. Also does some | |
* calculations to show the number of bits and the percentage. | |
*/ | |
public void printEncoding() { | |
int bits = 0; | |
Map<Character, String> ht = new Hashtable<Character, String>(); | |
System.out.println("Char Freq Code"); | |
for (Node n : nodeArray) { | |
bits += n.getFreq() * n.getCode().length(); | |
System.out.println("'" + n.getData() + "' -- " + n.getFreq() + " -- '" + n.getCode() + "'"); | |
ht.put(n.getData(), n.getCode()); | |
} | |
System.out.println("'" + s + "'" + " is encoded as:"); | |
char[] arr = s.toCharArray(); | |
for (char c : arr) { | |
System.out.print(ht.get(c) + " "); | |
} | |
int original = (s.length() * 16); | |
int difference = (s.length() * 16) - bits; | |
float p1 = bits * 1f / original; | |
float p2 = (1 - p1) * 100; | |
System.out.println("\nOrg compr diff percent"); | |
System.out.println(original + "----" + bits + "----" + difference + "----" + p2 + "%"); | |
System.out.println("\n \n \n"); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* One node in the Huffman tree. | |
* | |
* @author Anton Gustafsson | |
* | |
*/ | |
public class Node implements Comparable<Node> { | |
private char data; | |
private int freq; | |
private Node left; | |
private Node right; | |
private String code; | |
public Node(char data, int freq) { | |
this.data = data; | |
this.freq = freq; | |
} | |
public Node(char data, int freq, Node left, Node right) { | |
this.data = data; | |
this.freq = freq; | |
this.left = left; | |
this.right = right; | |
} | |
public char getData() { | |
return data; | |
} | |
public Node getLeft() { | |
return left; | |
} | |
public Node getRight() { | |
return right; | |
} | |
public String getCode() { | |
return code; | |
} | |
public void setCode(String code) { | |
this.code = code; | |
} | |
public int getFreq() { | |
return freq; | |
} | |
/** | |
* If left or right child is null, this is a leaf. | |
* | |
* @return | |
*/ | |
public boolean isLeafNode() { | |
return left == null && right == null; | |
} | |
/** | |
* Needs to be able to compare nodes in the order of their frequencies for | |
* the heap. | |
*/ | |
@Override | |
public int compareTo(Node o) { | |
if (o instanceof Node) { | |
return freq - ((Node) o).freq; | |
} | |
return -1; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment