Skip to content

Instantly share code, notes, and snippets.

@mrrodriguez
Last active June 27, 2024 13:59
Show Gist options
  • Save mrrodriguez/8f45767ffaca4efcc48f564033cee207 to your computer and use it in GitHub Desktop.
Save mrrodriguez/8f45767ffaca4efcc48f564033cee207 to your computer and use it in GitHub Desktop.
Basic Huffman encoding example
(ns metasimple.huffman.core
(:require
[clojure.pprint :as pp]
[clojure.string :as str]))
(defn- sorted-frequencies-map
[s]
(reduce (fn [fm c]
(update fm c (fnil inc 0)))
(array-map)
s))
(defn huffman-tree
[s]
(let [freq-map (sorted-frequencies-map s)
init-nodes (->> freq-map
(map (fn [[c n]] {:char-val c :value n}))
(sort-by :value))]
(loop [nodes init-nodes]
(if (< 1 (count nodes))
(let [[node1 node2 & rest-nodes] nodes
new-value (+ (:value node1) (:value node2))
new-node {:value new-value
:left node1
:right node2}]
(recur (->> new-node
(conj rest-nodes)
(sort-by :value))))
(first nodes)))))
(defn huffman-char-encoding-map
[tree]
(letfn [(subtree-paths [{:keys [left right char-val]}]
(if char-val
[[char-val]]
(let [left-paths (some-> left subtree-paths)
right-paths (some-> right subtree-paths)]
(into []
cat
[(map #(cons \0 %) left-paths)
(map #(cons \1 %) right-paths)]))))]
(reduce (fn [enc-map path]
(assoc enc-map (last path) (-> path butlast vec)))
{}
(subtree-paths tree))))
(defn huffman-encode
[s tree]
(let [enc-map (huffman-char-encoding-map tree)]
(reduce (fn [decoded sx]
(let [enc-path (get enc-map sx)]
(str decoded (str/join enc-path))))
""
s)))
(defn huffman-decode
[s tree]
(loop [decoded ""
[s1 & rest-s] s
subtree tree]
(if (nil? s1)
decoded
(let [direction (if (= \0 s1) :left :right)
{:keys [value char-val] :as new-subtree} (get subtree direction)]
(cond
char-val #_=>
(recur (str decoded char-val)
rest-s
tree)
value #_=>
(recur decoded
rest-s
new-subtree)
:else #_=>
(throw (ex-info "Bad encoding given" {:encoding s})))))))
(def tree1
(huffman-tree "bcaadddccacacac"))
(def encoding1
"01110011101")
(println "*********************************************")
(pp/pprint tree1)
(println "*********************************************")
(prn encoding1)
(prn (huffman-decode encoding1 tree1))
(println "*********************************************")
(prn "bad")
(prn (huffman-encode "bad" tree1))
(println "*********************************************")
(println "Round trip")
(prn (-> "bad"
(huffman-encode tree1)
(huffman-decode tree1)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment