Last active
June 18, 2020 09:56
-
-
Save genmeblog/3f12c557cfe082d11f4ed25ee5ef3b07 to your computer and use it in GitHub Desktop.
Format sequence of doubles to seq of aligned strings
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns format-sequence | |
"Format sequence of doubles" | |
(:require [clojure.pprint :refer [cl-format]] | |
[clojure.test :refer [deftest is]])) | |
(set! *warn-on-reflection* true) | |
(set! *unchecked-math* :warn-on-boxed) | |
;; maximum double power for precise calculations | |
(def ^:private ^:const ^long kp-max 22) | |
;; powers for scientific notation | |
(def ^:private tbl [1e-1, | |
1e00, 1e01, 1e02, 1e03, 1e04, 1e05, 1e06, 1e07, 1e08, 1e09, | |
1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, | |
1e20, 1e21, 1e22]) | |
(defn- left | |
"What is the power of number" | |
^long [^double x] | |
(-> x Math/log10 Math/floor unchecked-long inc)) | |
(defn- find-nsig | |
"Shift decimal places until non-zero value is found" | |
^long [^double alpha ^long digits] | |
(loop [a alpha | |
d digits] | |
(let [a- (/ a 10.0)] | |
(if (and (= a- (Math/floor a-))) | |
(recur a- (dec d)) | |
(max 1 d))))) | |
(defn- right | |
"Calculate maximum digits on the right side of the dot." | |
^long [^double x ^long digits] | |
(let [alpha (Math/round (* x ^double (tbl (inc digits))))] | |
(if (zero? alpha) | |
1 | |
(find-nsig alpha digits)))) | |
(defn- fix-left | |
"Fix number of digits on the left side. For scientific notations and non-positive exponent (lft) it should be leading digits + sign." | |
[^double x ^long lft e?] | |
(let [sgn (if (neg? x) 1 0)] | |
(if (or e? (not (pos? lft))) | |
(+ sgn 1) | |
(+ sgn lft)))) | |
(defn- precision | |
[^double x ^long digits ^long threshold] | |
(if (zero? x) | |
[false 0 1 1] ;; zero is reprezented as 0.0 | |
(let [digits (max 1 (min 10 digits)) ;; constrain digits to 1-10 range | |
r (Math/abs x) | |
lft (left r) ;; digits on the left side of dot | |
alft (Math/abs lft) | |
e? (>= alft threshold) | |
r-prec (cond | |
(< alft threshold) r ;; normal number | |
(< alft kp-max) (if (neg? lft) ;; scientific number (using table to shift values) | |
(* r ^double (tbl (inc (- lft)))) | |
(/ r ^double (tbl (inc lft)))) | |
:else (/ r (Math/pow 10.0 (dec lft)))) ;; very big or very small case | |
rght (right r-prec digits) ;; desired precision on the right side | |
exp (if (> alft 100) 3 2) ;; size of the exponent | |
lft (fix-left x lft e?)] | |
[e? exp lft rght]))) | |
(defn- fit-precision | |
"Find best matching presision for given sequence." | |
[xs ^long digits ^long threshold] | |
(reduce (fn [[ce? ^long cexp ^long clft ^long crght ^long non-finite-len] x] | |
(let [^double x (if (instance? Float x) | |
(Double/valueOf (str x)) | |
(or x ##NaN))] | |
(if (Double/isFinite x) | |
(let [[e? ^long exp ^long lft ^long rght] (precision x digits threshold)] | |
(if (and e? (pos? threshold)) | |
(reduced (fit-precision xs digits 0)) ;; switch to scientific notation | |
[(or e? ce?) | |
(max exp cexp) | |
(max lft clft) | |
(max rght crght) | |
non-finite-len])) | |
[ce? cexp clft crght (max non-finite-len (if (= x ##-Inf) 4 3))]))) | |
[false Integer/MIN_VALUE Integer/MIN_VALUE Integer/MIN_VALUE 0] xs)) | |
;; public functions | |
(defn formatter | |
"Create formatter for given: | |
* `xs` - sequence of doubles | |
* `digits` - maximum precision | |
* `threshold` - what is absolute power to switch to scientific notation | |
Returns formatter." | |
([xs] (formatter xs 8)) | |
([xs ^long digits] (formatter xs digits 8)) | |
([xs ^long digits ^long threshold] | |
(let [[e? ^long exp ^long lft ^long rght ^long non-finite-len] (fit-precision xs digits threshold) | |
w (max non-finite-len (if e? | |
(+ lft rght exp 3) ;; 3 = "." + sign of E + "E" | |
(+ lft rght 1))) ;; 1 for "." | |
fmt (if e? | |
(str "~" w "," rght "," exp "E") | |
(str "~" w "," rght "F")) | |
non-finite-fmt (str "~" w "@A")] | |
(fn [x] | |
(let [^double x (or x ##NaN)] | |
(if (Double/isFinite x) | |
(cl-format nil fmt x) | |
(cl-format nil non-finite-fmt (cond | |
(== ##Inf x) "Inf" | |
(== ##-Inf x) "-Inf" | |
:else "NaN")))))))) | |
(defn format-sequence | |
"Format sequence of double for given: | |
* `xs` - sequence of doubles | |
* `digits` - maximum precision | |
* `threshold` - what is absolute power to switch to scientific notation | |
Returns sequence of strings." | |
([xs] (format-sequence xs 8)) | |
([xs ^long digits] (format-sequence xs digits 8)) | |
([xs ^long digits ^long threshold] | |
(let [fmt (formatter xs digits threshold)] | |
(map fmt xs)))) | |
;; cases | |
(def a [0.000001 0.00001 0.0001 0.001 0.01 0.1 0.0 | |
1.0 10.0 100.0 1000.0 10000.0 100000.0]) | |
(format-sequence a) | |
;; => (" 0.000001" | |
;; " 0.000010" | |
;; " 0.000100" | |
;; " 0.001000" | |
;; " 0.010000" | |
;; " 0.100000" | |
;; " 0.000000" | |
;; " 1.000000" | |
;; " 10.000000" | |
;; " 100.000000" | |
;; " 1000.000000" | |
;; " 10000.000000" | |
;; "100000.000000") | |
(format-sequence a 5 4) | |
;; => ("1.0E-06" | |
;; "1.0E-05" | |
;; "1.0E-04" | |
;; "1.0E-03" | |
;; "1.0E-02" | |
;; "1.0E-01" | |
;; "0.0E+00" | |
;; "1.0E+00" | |
;; "1.0E+01" | |
;; "1.0E+02" | |
;; "1.0E+03" | |
;; "1.0E+04" | |
;; "1.0E+05") | |
(def b [10.0 10.1 10.11 10.111 10.1111 10.11111 | |
1.0 1.1 1.11 1.111 1.1111 1.11111 | |
0.0 0.1 0.11 0.111 0.1111 -0.11111]) | |
(format-sequence b) | |
;; => ("10.00000" | |
;; "10.10000" | |
;; "10.11000" | |
;; "10.11100" | |
;; "10.11110" | |
;; "10.11111" | |
;; " 1.00000" | |
;; " 1.10000" | |
;; " 1.11000" | |
;; " 1.11100" | |
;; " 1.11110" | |
;; " 1.11111" | |
;; " 0.00000" | |
;; " 0.10000" | |
;; " 0.11000" | |
;; " 0.11100" | |
;; " 0.11110" | |
;; "-0.11111") | |
(format-sequence b 5 2) | |
;; => (" 1.00000E+01" | |
;; " 1.01000E+01" | |
;; " 1.01100E+01" | |
;; " 1.01110E+01" | |
;; " 1.01111E+01" | |
;; " 1.01111E+01" | |
;; " 1.00000E+00" | |
;; " 1.10000E+00" | |
;; " 1.11000E+00" | |
;; " 1.11100E+00" | |
;; " 1.11110E+00" | |
;; " 1.11111E+00" | |
;; " 0.00000E+00" | |
;; " 1.00000E-01" | |
;; " 1.10000E-01" | |
;; " 1.11000E-01" | |
;; " 1.11100E-01" | |
;; "-1.11110E-01") | |
(def c (range -5 4 0.8795833)) | |
(format-sequence c) | |
;; => ("-5.0000000" | |
;; "-4.1204167" | |
;; "-3.2408334" | |
;; "-2.3612501" | |
;; "-1.4816668" | |
;; "-0.6020835" | |
;; " 0.2774998" | |
;; " 1.1570831" | |
;; " 2.0366664" | |
;; " 2.9162497" | |
;; " 3.7958330") | |
(format-sequence c 4) | |
;; => ("-5.0000" | |
;; "-4.1204" | |
;; "-3.2408" | |
;; "-2.3613" | |
;; "-1.4817" | |
;; "-0.6021" | |
;; " 0.2775" | |
;; " 1.1571" | |
;; " 2.0367" | |
;; " 2.9162" | |
;; " 3.7958") | |
(format-sequence c 4 0) | |
;; => ("-5.0000E+00" | |
;; "-4.1204E+00" | |
;; "-3.2408E+00" | |
;; "-2.3613E+00" | |
;; "-1.4817E+00" | |
;; "-6.0208E-01" | |
;; " 2.7750E-01" | |
;; " 1.1571E+00" | |
;; " 2.0367E+00" | |
;; " 2.9162E+00" | |
;; " 3.7958E+00") | |
(def d [-1.0e-20 -1.334e-100 3.43e100 4.556e20 | |
1.0e-20 1.334e-100 -3.43e100 -41.556e20 | |
0.999e-300 -0.999e300]) | |
(format-sequence d) | |
;; => ("-1.0000E-020" | |
;; "-1.3340E-100" | |
;; " 3.4300E+100" | |
;; " 4.5560E+020" | |
;; " 1.0000E-020" | |
;; " 1.3340E-100" | |
;; "-3.4300E+100" | |
;; "-4.1556E+021" | |
;; " 9.9900E-301" | |
;; "-9.9900E+299") | |
(def e [-1.0e99 1.0e99]) | |
(format-sequence e) | |
;; => ("-1.0E+99" " 1.0E+99") | |
(def f [-1.0e100 1.0e100]) | |
(format-sequence f) | |
;; => ("-1.0E+100" " 1.0E+100") | |
(def g [0.002 0.0002 0.000333 0.1 -0.0003 0.0]) | |
(format-sequence g) | |
;; => (" 0.002000" | |
;; " 0.000200" | |
;; " 0.000333" | |
;; " 0.100000" | |
;; "-0.000300" | |
;; " 0.000000") | |
(def h [0.002 0.0002 0.00333 0.00001 -0.0003 0.022 0.0001]) | |
(format-sequence h) | |
;; => (" 0.00200" | |
;; " 0.00020" | |
;; " 0.00333" | |
;; " 0.00001" | |
;; "-0.00030" | |
;; " 0.02200" | |
;; " 0.00010") | |
(def i [10.0 ##NaN ##Inf ##-Inf 100 0.001 nil]) | |
(format-sequence i) | |
;; => (" 10.000" | |
;; " NaN" | |
;; " Inf" | |
;; " -Inf" | |
;; "100.000" | |
;; " 0.001" | |
;; " NaN") | |
(format-sequence i 0 0) | |
;; => ("1.0E+01" | |
;; " NaN" | |
;; " Inf" | |
;; " -Inf" | |
;; "1.0E+02" | |
;; "1.0E-03" | |
;; " NaN") | |
(def j (map float [39.81 36.35 43.22 28.37 25.45 | |
-39.81 36.351 43.221 28.371 25.451])) | |
(format-sequence j) | |
;; => (" 39.810" | |
;; " 36.350" | |
;; " 43.220" | |
;; " 28.370" | |
;; " 25.450" | |
;; "-39.810" | |
;; " 36.351" | |
;; " 43.221" | |
;; " 28.371" | |
;; " 25.451") | |
(deftest regression-tests | |
(is (= (format-sequence j) | |
'(" 39.810" " 36.350" " 43.220" " 28.370" " 25.450" "-39.810" " 36.351" " 43.221" " 28.371" " 25.451"))) | |
(is (= (format-sequence i 0 0) | |
'("1.0E+01" " NaN" " Inf" " -Inf" "1.0E+02" "1.0E-03" " NaN"))) | |
(is (= (format-sequence a) | |
'(" 0.000001" " 0.000010" " 0.000100" " 0.001000" " 0.010000" " 0.100000" " 0.000000" " 1.000000" " 10.000000" " 100.000000" " 1000.000000" " 10000.000000" "100000.000000"))) | |
(is (= (format-sequence a 5 4) | |
'("1.0E-06" "1.0E-05" "1.0E-04" "1.0E-03" "1.0E-02" "1.0E-01" "0.0E+00" "1.0E+00" "1.0E+01" "1.0E+02" "1.0E+03" "1.0E+04" "1.0E+05"))) | |
(is (= (format-sequence b) | |
'("10.00000" "10.10000" "10.11000" "10.11100" "10.11110" "10.11111" " 1.00000" " 1.10000" " 1.11000" " 1.11100" " 1.11110" " 1.11111" " 0.00000" " 0.10000" " 0.11000" " 0.11100" " 0.11110" "-0.11111"))) | |
(is (= (format-sequence b 5 2) | |
'(" 1.00000E+01" " 1.01000E+01" " 1.01100E+01" " 1.01110E+01" " 1.01111E+01" " 1.01111E+01" " 1.00000E+00" " 1.10000E+00" " 1.11000E+00" " 1.11100E+00" " 1.11110E+00" " 1.11111E+00" " 0.00000E+00" " 1.00000E-01" " 1.10000E-01" " 1.11000E-01" " 1.11100E-01" "-1.11110E-01"))) | |
(is (= (format-sequence c) | |
'("-5.0000000" "-4.1204167" "-3.2408334" "-2.3612501" "-1.4816668" "-0.6020835" " 0.2774998" " 1.1570831" " 2.0366664" " 2.9162497" " 3.7958330"))) | |
(is (= (format-sequence c 4) | |
'("-5.0000" "-4.1204" "-3.2408" "-2.3613" "-1.4817" "-0.6021" " 0.2775" " 1.1571" " 2.0367" " 2.9162" " 3.7958"))) | |
(is (= (format-sequence c 4 0) | |
'("-5.0000E+00" "-4.1204E+00" "-3.2408E+00" "-2.3613E+00" "-1.4817E+00" "-6.0208E-01" " 2.7750E-01" " 1.1571E+00" " 2.0367E+00" " 2.9162E+00" " 3.7958E+00"))) | |
(is (= (format-sequence d) | |
'("-1.0000E-020" "-1.3340E-100" " 3.4300E+100" " 4.5560E+020" " 1.0000E-020" " 1.3340E-100" "-3.4300E+100" "-4.1556E+021" " 9.9900E-301" "-9.9900E+299"))) | |
(is (= (format-sequence e) | |
'("-1.0E+99" " 1.0E+99"))) | |
(is (= (format-sequence f) | |
'("-1.0E+100" " 1.0E+100"))) | |
(is (= (format-sequence g) | |
'(" 0.002000" " 0.000200" " 0.000333" " 0.100000" "-0.000300" " 0.000000"))) | |
(is (= (format-sequence h) | |
'(" 0.00200" " 0.00020" " 0.00333" " 0.00001" "-0.00030" " 0.02200" " 0.00010"))) | |
(is (= (format-sequence i) | |
'(" 10.000" " NaN" " Inf" " -Inf" "100.000" " 0.001" " NaN"))) | |
(is (= (format-sequence i 0 0) | |
'("1.0E+01" " NaN" " Inf" " -Inf" "1.0E+02" "1.0E-03" " NaN"))) | |
(is (= (format-sequence j) | |
'(" 39.810" " 36.350" " 43.220" " 28.370" " 25.450" "-39.810" " 36.351" " 43.221" " 28.371" " 25.451")))) | |
(regression-tests) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment