Last active
July 26, 2019 17:23
-
-
Save numberoverzero/d2ae0af4dd0b5fdc7f0d2fec06590418 to your computer and use it in GitHub Desktop.
support integral base string representations, especially for lexicographical sorting
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def in_base(n: int, alphabet: str) -> str: | |
b = len(alphabet) | |
s = [] | |
while n > 0: | |
d = n % b | |
s.append(alphabet[d]) | |
n //= b | |
return "".join(reversed(s)) | |
def from_base(s: str, alphabet: str): | |
alphabet = {c: i for (i, c) in enumerate(alphabet)} | |
b = len(alphabet) | |
n = 0 | |
for i, c in enumerate(reversed(s)): | |
v = alphabet[c] | |
n += v * b**i | |
return n | |
b64lexi = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz{}" | |
# 'JI' | |
print(in_base(1234, lexicographical_b64, 4)) | |
# '}}}}' | |
print(in_base(64**4-1, lexicographical_b64, 4)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import overload | |
class IntPacking: | |
def __init__(self, alphabet): | |
self._i2c = alphabet | |
self._c2i = {c: i for (i, c) in enumerate(alphabet)} | |
self._b = len(alphabet) | |
@property | |
def base(self) -> int: | |
return self._b | |
@property | |
def alphabet(self) -> str: | |
return self._i2c | |
@overload | |
def __call__(self, x: str) -> int: | |
... | |
@overload | |
def __call__(self, x: int) -> str: | |
... | |
def __call__(self, x): | |
if isinstance(x, int): | |
func = self.to_str | |
elif isinstance(x, str): | |
func = self.to_int | |
return func(x) | |
def to_str(self, n: int) -> str: | |
s, b, alphabet = [], self._b, self._i2c | |
while n > 0: | |
n, d = divmod(n, b) | |
s.append(alphabet[d]) | |
return "".join(reversed(s)) | |
def to_int(self, s: str) -> int: | |
n, b, alphabet = 0, self._b, self._c2i | |
for i, c in enumerate(reversed(s)): | |
n += alphabet[c] * (b ** i) | |
return n | |
p = IntPacking("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz{}") | |
# 'JI' | |
p.to_str(1234) | |
# 1234 | |
p.to_int("JI") | |
# True | |
p(p(12345678)) == 12345678 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
def pack_fns(alphabet): | |
""" | |
Usage:: | |
>>> b64lexi = ( | |
... "0123456789" | |
... "ABCDEFGHIJKLMNOPQRSTUVWXYZ" | |
... "abcdefghijklmnopqrstuvwxyz" | |
... "{}" | |
... ) | |
>>> to_str, to_int = pack_fns(b64lexi) | |
>>> to_str(1234) | |
'JI' | |
>>> to_int('JI') | |
1234 | |
""" | |
b = len(alphabet) | |
b_divisor = math.log(b, 2) | |
i2c = alphabet | |
c2i = {c: i for (i, c) in enumerate(alphabet)} | |
def to_str(n): | |
i = 0 | |
# unset values in s will be dropped by "".join() | |
s = [""] * (1 + int(n.bit_length() / b_divisor)) | |
while n > 0: | |
n, d = divmod(n, b) | |
s[i] = i2c[d] | |
i += 1 | |
return "".join(reversed(s)) | |
def to_int(s): | |
n = 0 | |
for i, c in enumerate(reversed(s)): | |
n += c2i[c] * (b ** i) | |
return n | |
to_str.base, to_str.alphabet = b, i2c | |
to_int.base, to_int.alphaber = b, i2c | |
return to_str, to_int | |
alphabets = { | |
"hex": "0123456789abcdef", | |
"base64lexi": ( | |
"0123456789" | |
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" | |
"abcdefghijklmnopqrstuvwxyz" | |
"{}" | |
) | |
} | |
if __name__ == "__main__": | |
import re, sys | |
cmd, args = sys.argv[0], sys.argv[1:] | |
if len(args) != 3: | |
print("usage: {} ALIAS MODE VALUE".format(cmd)) | |
sys.exit(1) | |
alias, mode, value = args | |
if alias not in alphabets: | |
print(f"error: alias must be one of {list(alphabets.keys())}") | |
sys.exit(1) | |
if mode not in {"e", "d"}: | |
print("error: mode must be 'e' or 'd') | |
sys.exit(1) | |
to_s, to_i = pack_fns(alphabets[alias]) | |
fn = {"e": to_i, "d": to_s}[mode] | |
print(fn(value)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
for example, useful for building composite sort keys in DynamoDb to use in begins_with conditions