Created
April 12, 2024 03:53
-
-
Save jevinskie/41d3bd7a01325bbb6aca11f861567c51 to your computer and use it in GitHub Desktop.
Python pretty byte histogram rendering for TUI
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -> % ./byte_histogram.py -i ./byte_histogram.py | |
# 20 ' ' [ 1426]: ██████████████████████████████████████████████████████████████████████████████████████████████ | |
# e2 [ 427]: ████████████████████████████▏ | |
# 96 [ 427]: ████████████████████████████▏ | |
# 88 [ 337]: ██████████████████████▏ | |
# 0a ' ' [ 183]: ████████████ | |
# 27 ''' [ 171]: ███████████▎ | |
# 65 'e' [ 144]: █████████▍ | |
# 3a ':' [ 130]: ████████▌ | |
# 74 't' [ 129]: ████████▌ | |
# 69 'i' [ 120]: ███████▉ | |
# 72 'r' [ 116]: ███████▋ | |
# 5b '[' [ 104]: ██████▊ | |
# 5d ']' [ 104]: ██████▊ | |
# 23 '#' [ 96]: ██████▎ | |
# 73 's' [ 94]: ██████▏ | |
# 61 'a' [ 87]: █████▋ | |
# 6e 'n' [ 78]: █████▏ | |
# 5f '_' [ 76]: █████ | |
# 6c 'l' [ 69]: ████▌ | |
# 6f 'o' [ 59]: ███▉ | |
# 34 '4' [ 57]: ███▊ | |
# 66 'f' [ 56]: ███▋ | |
# 64 'd' [ 52]: ███▍ | |
# 6d 'm' [ 51]: ███▎ | |
# 31 '1' [ 49]: ███▏ | |
# 70 'p' [ 48]: ███▏ | |
# 33 '3' [ 46]: ███ | |
# 32 '2' [ 45]: ██▉ | |
# 22 '"' [ 45]: ██▉ | |
# 28 '(' [ 44]: ██▉ | |
# 29 ')' [ 44]: ██▉ | |
# 62 'b' [ 43]: ██▊ | |
# 63 'c' [ 42]: ██▊ | |
# 3d '=' [ 40]: ██▋ | |
# 67 'g' [ 33]: ██▏ | |
# 75 'u' [ 32]: ██ | |
# 68 'h' [ 31]: ██ | |
# 30 '0' [ 31]: ██ | |
# 37 '7' [ 30]: █▉ | |
# 35 '5' [ 30]: █▉ | |
# 79 'y' [ 29]: █▉ | |
# 38 '8' [ 29]: █▉ | |
# 36 '6' [ 29]: █▉ | |
# 2e '.' [ 28]: █▊ | |
# 2c ',' [ 25]: █▋ | |
# 39 '9' [ 20]: █▎ | |
# 8f [ 17]: █ | |
# 54 'T' [ 17]: █ | |
# 8e [ 16]: █ | |
# 77 'w' [ 16]: █ | |
# 2d '-' [ 14]: ▉ | |
# 8a [ 14]: ▉ | |
# 78 'x' [ 14]: ▉ | |
# 89 [ 12]: ▊ | |
# 46 'F' [ 12]: ▊ | |
# 49 'I' [ 12]: ▊ | |
# 6b 'k' [ 12]: ▊ | |
# 8b [ 11]: ▋ | |
# 48 'H' [ 11]: ▋ | |
# 4e 'N' [ 11]: ▋ | |
# 2b '+' [ 11]: ▋ | |
# 8c [ 10]: ▋ | |
# 8d [ 10]: ▋ | |
# 4d 'M' [ 10]: ▋ | |
# 2f '/' [ 9]: ▌ | |
# 3e '>' [ 9]: ▌ | |
# 42 'B' [ 9]: ▌ | |
# 41 'A' [ 9]: ▌ | |
# 44 'D' [ 8]: ▌ | |
# 55 'U' [ 7]: ▍ | |
# 7b '{' [ 7]: ▍ | |
# 7d '}' [ 7]: ▍ | |
# 45 'E' [ 6]: ▍ | |
# 43 'C' [ 6]: ▍ | |
# 50 'P' [ 5]: ▎ | |
# 52 'R' [ 5]: ▎ | |
# 57 'W' [ 5]: ▎ | |
# 4c 'L' [ 5]: ▎ | |
# 76 'v' [ 4]: ▎ | |
# 4f 'O' [ 4]: ▎ | |
# 47 'G' [ 4]: ▎ | |
# 53 'S' [ 4]: ▎ | |
# 7a 'z' [ 4]: ▎ | |
# 2a '*' [ 4]: ▎ | |
# 25 '%' [ 3]: ▏ | |
# 58 'X' [ 3]: ▏ | |
# 4b 'K' [ 3]: ▏ | |
# 40 '@' [ 3]: ▏ | |
# 3c '<' [ 3]: ▏ | |
# 21 '!' [ 2]: ▏ | |
# 7c '|' [ 2]: ▏ | |
# 5c '\' [ 2]: ▏ | |
# 71 'q' [ 2]: ▏ | |
import argparse | |
import sys | |
import termios | |
from collections import defaultdict | |
from pathlib import Path | |
from string import printable, whitespace | |
from typing import Final | |
class ByteHistogram(defaultdict): | |
MAX_TERM_WIDTH: Final[int] = 1024 | |
FALLBACK_TERM_WIDTH: Final[int] = 80 | |
COUNT_NUM_DIGITS: Final[int] = 5 | |
def __init__(self, include_zeros: bool = False): | |
super().__init__(int) | |
if include_zeros: | |
for i in range(0x100): | |
self[i] = 0 | |
@staticmethod | |
def get_bar_width(prefix_len: int) -> int: | |
try: | |
_, col = termios.tcgetwinsize(sys.stdin) | |
except Exception: | |
col = ByteHistogram.FALLBACK_TERM_WIDTH | |
col = min(col, ByteHistogram.MAX_TERM_WIDTH) | |
col = max(col - prefix_len, 0) | |
return col | |
@staticmethod | |
def block_str(percentage: float, width: int = 80) -> str: | |
full_width = width * 8 | |
num_blk = int(percentage * full_width) | |
full_blks = num_blk // 8 | |
partial_blks = num_blk % 8 | |
return "█" * full_blks + ("", "▏", "▎", "▍", "▌", "▋", "▊", "▉")[partial_blks] | |
def add_bytes(self, buf: bytes) -> None: | |
for b in buf: | |
if b >= 0xFF: | |
print("wtf: {b:02x}") | |
self[b] += 1 | |
def ascii_histogram(self, width: int | None = None): | |
assert all([0 <= b <= 0xFF for b in self.keys()]) | |
res = "" | |
sorted_self = dict(sorted(self.items(), key=lambda i: i[1], reverse=True)) | |
max_num = list(sorted_self.values())[0] | |
if width is None: | |
width = self.get_bar_width(2 + 1 + 3 + 2 + ByteHistogram.COUNT_NUM_DIGITS + 3) | |
for m, n in sorted_self.items(): | |
c = chr(m) | |
if c in printable: | |
byte_rep = f"'{c}'" | |
if c in whitespace: | |
byte_rep = "' '" | |
else: | |
byte_rep = " " | |
res += ( | |
f"{m:02x} {byte_rep} [{n:{ByteHistogram.COUNT_NUM_DIGITS}d}]: " | |
+ self.block_str(n / max_num, width=width) | |
+ "\n" | |
) | |
return res | |
def real_main(args) -> int: | |
hist = ByteHistogram() | |
hist.add_bytes(open(args.in_file, "rb").read()) | |
print(hist.ascii_histogram()) | |
return 0 | |
def get_arg_parser() -> argparse.ArgumentParser: | |
parser = argparse.ArgumentParser(description="byte_histogram.py") | |
parser.add_argument("-i", "--in-file", required=True, type=Path, help="Input path") | |
return parser | |
def main() -> int: | |
return real_main(get_arg_parser().parse_args()) | |
if __name__ == "__main__": | |
sys.exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment