Created
January 22, 2020 16:34
-
-
Save adicuco/e28be0fc8de1141d5d23916bdf20b68f to your computer and use it in GitHub Desktop.
Huffman encoding in Haskell
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Main where | |
import System.IO | |
import Data.List | |
import Data.List.Utils | |
data Tree a = Leaf a | Node a (Tree a) (Tree a) deriving Show | |
getLeft :: Tree a -> Tree a | |
getLeft (Node _ x _) = x | |
getRight :: Tree a -> Tree a | |
getRight (Node _ _ y) = y | |
getSymbol :: Tree (a, b) -> a | |
getSymbol (Leaf (symbol, _)) = symbol | |
getSymbol (Node value _ _) = fst value | |
getWeight :: Tree (a, b) -> b | |
getWeight (Leaf (_, weight)) = weight | |
getWeight (Node value _ _) = snd value | |
-------------- Utils ----------------------- | |
-- lookup an element in an association list with key i | |
-- return element if found, or Nothing otherwise | |
lookup' :: Eq t => [(t, p)] -> t -> Maybe p | |
lookup' [] i = Nothing | |
lookup' (x:xs) i = if (fst x == i) | |
then Just (snd x) | |
else lookup' xs i | |
-- sort association list by comparing the values | |
sort' :: Ord b => [(a, b)] -> [(a, b)] | |
sort' lst = sortBy (\ a b -> compare (snd a) (snd b)) lst | |
-- sort lit of Tree by comparing the weights | |
sortTree :: Ord b => [Tree (a, b)] -> [Tree (a, b)] | |
sortTree tree = sortBy (\ a b -> compare (getWeight a) (getWeight b)) tree | |
-- create an array of leafs | |
mapToLeafs :: [a] -> [Tree a] | |
mapToLeafs lst = map (\x -> Leaf x) lst | |
-------------------------------------------- | |
--------------- Frequency ------------------ | |
-- create a list of form ([letter], 0) for each letter of a string | |
freqArray :: Num b => [a] -> [([a], b)] | |
freqArray [] = [] | |
freqArray (x:xs) = [([x], 0)] ++ freqArray xs | |
-- count and sort the appereances of a letter in the freqArray | |
combineFreqs :: Eq a => [(a, b)] -> [(a, Int)] -> [(a, Int)] | |
combineFreqs [] fs = sort' fs | |
combineFreqs (x:xs) fs = if lookup' fs (fst x) == Nothing | |
then combineFreqs xs (fs ++ [(fst x, count)]) | |
else combineFreqs xs fs | |
where count = length (filter (\y -> fst y == fst x) (x:xs)) | |
buildFreq :: Eq a => [a] -> [([a], Int)] | |
buildFreq txt = combineFreqs (freqArray txt) [] | |
-------------------------------------------- | |
--------------- Tree ----------------------- | |
-- construct a new Node with a b Leafs containg the combined symbols and weight of the leafs | |
buildNode :: Num b => Tree ([a], b) -> Tree ([a], b) -> Tree ([a], b) | |
buildNode a b = Node (getSymbol a ++ getSymbol b, getWeight a + getWeight b) (a) (b) | |
-- construct the Tree by recursively adding the first two Leafs together | |
buildTree :: (Num b, Ord b) => [Tree ([a], b)] -> Tree ([a], b) | |
buildTree [a, b] = [buildNode a b]!!0 | |
buildTree (x:y:xs) = buildTree (sortTree ([buildNode x y] ++ xs)) | |
-------------------------------------------- | |
--------------- Symbols Codes -------------- | |
-- traverse the tree to find the lenght one symbols | |
-- the path to that symbol is its code | |
symbolCodes :: Foldable t => Tree (t a, b) -> [Char] -> [Char] -> [(t a, [Char])] | |
symbolCodes node side code = if (length symbol == 1) | |
then [(symbol, newCode)] | |
else (symbolCodes (getLeft node) "0" newCode) ++ (symbolCodes (getRight node) "1" newCode) | |
where symbol = getSymbol node | |
newCode = code ++ side | |
-- construct the symbol codes from the tree | |
buildSymbolCodes :: Foldable t => Tree (t a, b) -> [(t a, [Char])] | |
buildSymbolCodes tree = symbolCodes tree "" "" | |
-------------------------------------------- | |
--------------- Encode --------------------- | |
-- replace each letter with its code | |
replaceSymbols :: Eq a => [a] -> [([a], [a])] -> [a] | |
replaceSymbols txt [] = txt | |
replaceSymbols txt (x:xs) = replaceSymbols (replace (fst x) (snd x) txt) xs | |
-- encode the original string | |
encode :: Eq a => [a] -> [([a], [a])] -> [a] | |
encode txt codes = replaceSymbols txt codes | |
-------------------------------------------- | |
--------------- Decode --------------------- | |
-- recurively traverse the encoded string and follow the bits down the Tree | |
-- until a length 1 symbol is found | |
decoder :: [Char] -> Tree ([a], b) -> Tree ([a], b) -> [a] -> [a] | |
decoder [] tree node original = original | |
decoder (x:xs) tree node original = if (length symbol == 1) | |
then decoder xs tree tree (original ++ symbol) | |
else decoder xs tree leaf original | |
where leaf = if (x == '0') | |
then getLeft node | |
else getRight node | |
symbol = getSymbol leaf | |
-- decode the string using the Tree | |
decode :: [Char] -> Tree ([Char], b) -> [Char] | |
decode txt tree = decoder txt tree tree "" | |
-------------------------------------------- | |
main :: IO () | |
main = do | |
putStrLn "Insert text to encode:" | |
input <- getLine | |
putStrLn "------------------------------------------------" | |
putStr "original: " | |
putStrLn input | |
let frequency = (buildFreq input) | |
let tree = buildTree (mapToLeafs frequency) | |
let codes = buildSymbolCodes tree | |
let encoded = encode input codes | |
let decoded = decode encoded tree | |
putStr "encoded: " | |
putStrLn encoded | |
putStr "decoded: " | |
putStrLn decoded | |
putStrLn "------------------------------------------------" | |
main |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment