Created
October 29, 2023 06:45
-
-
Save jdf-id-au/a8aee871b7f2adf52080a0a641dd0361 to your computer and use it in GitHub Desktop.
Lempel-Ziv-Welch decoder
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns lzw | |
"After https://github.com/pgodwin/OcfLzw" | |
(:import (java.io ByteArrayInputStream StringWriter))) | |
(defn read-bits | |
"Read `n` bits from `partial-byte` (most significant first) then `bais` as needed. | |
Return vector of value, new bits-remaining-of and new partial-byte. | |
Nil if requested number of bits not available or > Long.SIZE/2 ." | |
[n ^ByteArrayInputStream bais bits-remaining-of partial-byte] | |
(if (> n (/ Long/SIZE 2)) | |
nil | |
(loop [current-byte partial-byte ; e.g. 0000_0abc (indicating bit position) | |
current-remaining bits-remaining-of ; e.g. 3 | |
still-to-get n | |
ret (long 0)] ; e.g. 0000_0def (only 8 of 64 bits shown) | |
(case current-byte | |
-1 nil | |
(if (zero? still-to-get) | |
[ret current-remaining current-byte] | |
(if (zero? current-remaining) | |
(recur (.read bais) 8 still-to-get ret) | |
(recur current-byte (dec current-remaining) (dec still-to-get) | |
(bit-or (bit-shift-left ret 1) ; e.g. 0000_abc0 | |
(bit-and | |
(bit-shift-right current-byte (dec current-remaining)) ; e.g. 0000_000d | |
0x1))))))))) ; e.g. 0000_abcd | |
(defn decode | |
"Includes clear-table, end-of-data, variable chunk size." | |
[bytes] | |
(let [min-chunk-size 9 ; bits | |
early-change 1 | |
chunk-size (fn [table] (condp #(>= %2 (- %1 early-change)) (count table) | |
4096 13 2048 12 1024 11 512 10 9)) | |
bais (ByteArrayInputStream. bytes) | |
sw (StringWriter.) | |
new-table #(transient (into (mapv (comp str char) (range 256)) | |
[nil nil]))] ;; end-of-data and clear-table | |
(loop [last-command nil | |
[next-command cr cb] (read-bits min-chunk-size bais 0 (byte 0)) | |
table (new-table)] | |
(case next-command ; needs literals | |
;; abnormal end of data | |
nil (.toString sw) | |
;; clear-table | |
256 (recur nil (read-bits min-chunk-size bais cr cb) (new-table)) | |
;; end-of-data | |
257 (.toString sw) | |
;; else | |
(if-let [nc (get table next-command)] | |
(let [table (if last-command | |
(conj! table (str (table last-command) (first (table next-command)))) ; nb last next | |
table)] | |
(.write sw nc) | |
(recur next-command (read-bits (chunk-size table) bais cr cb) table)) | |
(let [new-thing (str (table last-command) (first (table last-command))) ; NB last last | |
table (conj! table new-thing)] | |
(.write sw new-thing) | |
(recur next-command (read-bits (chunk-size table) bais cr cb) table))))))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment