Last active
July 16, 2024 09:25
-
-
Save DavidBuchanan314/e2d84c50cbd8e7c86eaa25f0c5b29a5c to your computer and use it in GitHub Desktop.
I wrote about this code in more detail here: https://www.da.vidbuchanan.co.uk/blog/signing-json.html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
DISCLAIMER: This is a quick prototype, it's not at all tested, and may be deeply cryptographically flawed. | |
Normally, JSON canonicalization is at least O(nlogn), because you need to sort the map keys. | |
This approach avoids the need to do that, and in theory it's O(n), but in practice it's probably slower for most inputs... I have not benchmarked. | |
If you limit recursion depth, you could implement it as an Online Algorithm https://en.wikipedia.org/wiki/Online_algorithm | |
NB: Python's JSON parser allows duplicate map keys, which this impl will be oblivious to. | |
(Edit: I'm now using object_pairs_hook to forbid duplicate keys) | |
hash_number() is probably a bit half-baked too. | |
""" | |
import json | |
import struct | |
import hashlib | |
from typing import Dict, Any | |
import nacl.bindings.crypto_core # install via: python3 -m pip install git+https://github.com/pyca/pynacl | |
hash256 = hashlib.sha256 | |
def hash_str(value: str) -> bytes: | |
return hash256(b"s" + value.encode("utf-8")).digest() | |
def hash_number(value: int | float) -> bytes: | |
return hash256(b"n" + struct.pack("<d", value)).digest() | |
def hash_value(value: Any) -> bytes: | |
t = type(value) | |
if t is str: | |
return hash_str(value) | |
if t in (int, float): | |
return hash_number(value) | |
if t is list: | |
return hash_array(value) | |
if t is dict: | |
return hash_map(value) | |
if t is bool: | |
return hash256(b"t" if value else b"f").digest() | |
if value is None: | |
return hash256(b"u").digest() # u for undefined | |
raise Exception(f"dunno how to hash a {t}") | |
def hash_array(value: list) -> bytes: | |
h = hash256() | |
h.update(b"a") | |
for entry in value: | |
h.update(hash_value(entry)) | |
return h.digest() | |
def hash_map(value: Dict[str, Any]) -> bytes: | |
accumulator = b'\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' | |
for k, v in value.items(): | |
point = nacl.bindings.crypto_core.crypto_core_ed25519_from_uniform(hash256(hash_str(k) + hash_value(v)).digest()) | |
accumulator = nacl.bindings.crypto_core.crypto_core_ed25519_add(accumulator, point) | |
return hash256(b"m" + accumulator).digest() | |
def ensure_no_duplicate_keys(object_pairs: Dict[str, Any]) -> dict: | |
value = dict(object_pairs) | |
if len(value) != len(object_pairs): | |
raise ValueError("Duplicate JSON map keys") | |
return value | |
def hash_json(data: str) -> bytes: | |
return hash_value(json.loads(data, object_pairs_hook=ensure_no_duplicate_keys)) | |
if __name__ == "__main__": | |
print(hash_json('{"hello": "world"}').hex()) | |
assert( | |
hash_json('{"a": "a", "b": "b"}') == \ | |
hash_json('{ "b": "b", "a": "a"}') | |
) | |
assert( | |
hash_json('{"b": "b", "a": "a", "c": "c"}') == \ | |
hash_json('{"c": "c", "b": "b", "a": "a"}') == \ | |
hash_json('{"a" : "a", "b": "b", "c": "c"}') | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment