Skip to content

Instantly share code, notes, and snippets.

@FeepingCreature
Last active April 26, 2024 08:33
Show Gist options
  • Save FeepingCreature/491763b314b8ac0f9f5411312fb4c752 to your computer and use it in GitHub Desktop.
Save FeepingCreature/491763b314b8ac0f9f5411312fb4c752 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# Helper script that classifies the structure of a JSON object.
# Useful for getting an overview of novel JSON data.
# Created largely by Claude 3 Opus.
import json
import sys
class JSONType:
def is_similar(self, other):
return isinstance(other, self.__class__)
def merge(self, other):
assert self.is_similar(other)
pass
class JSONObject(JSONType):
def __init__(self, fields=None):
self.fields = fields or {}
for field in self.fields.values():
assert isinstance(field, JSONType)
def is_similar(self, other):
if not isinstance(other, JSONObject):
return False
common_keys = set(self.fields.keys()) & set(other.fields.keys())
return len(common_keys) >= min(len(self.fields), len(other.fields))
def merge(self, other):
for key, value in self.fields.items():
if key not in other.fields:
self.add_to_field(key, JSONUndefined())
for key, value in other.fields.items():
if not isinstance(value, list):
value = [value]
for other_type in value:
if key in self.fields:
existing = self.fields[key]
if not isinstance(existing, list):
existing = [existing]
merged = False
for existing_value in existing:
if existing_value.is_similar(other_type):
existing_value.merge(other_type)
merged = True
break
if not merged:
self.add_to_field(key, other_type)
else:
self.add_to_field(key, JSONUndefined())
self.add_to_field(key, other_type)
def add_to_field(self, key, type):
if key in self.fields:
if isinstance(self.fields[key], list):
self.fields[key].append(type)
else:
self.fields[key] = [self.fields[key], type]
else:
self.fields[key] = type
def __repr__(self):
fields_str = ', '.join(f"{k} => {v}" for k, v in self.fields.items())
return f"JSONObject({fields_str})"
class JSONMap(JSONType):
def __init__(self, field_type=None):
self.field_type = field_type
def __repr__(self):
return f"JSONMap(=> {self.field_type})"
def is_similar(self, other):
if not isinstance(other, JSONMap):
return False
return self.field_type.is_similar(other.field_type)
def merge(self, other):
if self.field_type is None:
self.field_type = other.field_type
else:
self.field_type.merge(other.field_type)
class JSONArray(JSONType):
def __init__(self, elements=None):
self.elements = elements or []
def is_similar(self, other):
return isinstance(other, JSONArray)
def merge(self, other):
for element in other.elements:
merged = False
for existing_element in self.elements:
if existing_element.is_similar(element):
existing_element.merge(element)
merged = True
break
if not merged:
self.elements.append(element)
def __repr__(self):
elements_str = ', '.join(repr(e) for e in self.elements)
return f"JSONArray([{elements_str}])"
class JSONString(JSONType):
def __repr__(self):
return "string"
class JSONNumber(JSONType):
def __repr__(self):
return "number"
class JSONInteger(JSONNumber):
def __repr__(self):
return "integer"
class JSONFloat(JSONNumber):
def __repr__(self):
return "float"
class JSONBool(JSONType):
def __repr__(self):
return "bool"
class JSONNull(JSONType):
def __repr__(self):
return "null"
class JSONUndefined(JSONType):
def __repr__(self):
return "undefined"
def extract_json_structure(data):
if isinstance(data, dict):
obj = JSONObject()
for key, value in data.items():
obj.fields[key] = extract_json_structure(value)
# is the object actually a map?
field_types = JSONArray()
for field_type in obj.fields.values():
field_types.merge(JSONArray([field_type]))
if len(field_types.elements) == 1:
return JSONMap(field_types.elements[0])
return obj
elif isinstance(data, list):
arr = JSONArray()
for item in data:
element = extract_json_structure(item)
arr.merge(JSONArray([element]))
return arr
elif isinstance(data, str):
return JSONString()
elif isinstance(data, int):
return JSONInteger()
elif isinstance(data, float):
return JSONFloat()
elif isinstance(data, bool):
return JSONBool()
elif data is None:
return JSONNull()
else:
return JSONUndefined()
json_data = sys.stdin.read()
data = json.loads(json_data)
structure = extract_json_structure(data)
print(structure)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment