Last active
July 2, 2016 07:59
-
-
Save ls0f/7740715b45564488f9f68aa01bd9f011 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#coding:utf-8 | |
import re | |
class Parse(object): | |
def __init__(self): | |
self.i = -1 | |
self.text = "" | |
self.js = None | |
def next_char(self): | |
return self.text[self.i+1] | |
def next_next_char(self): | |
return self.text[self.i+2] | |
def consume(self, l=1): | |
assert self.i < len(self.text) | |
t = self.text[self.i+1: self.i+l+1] | |
self.i += l | |
return t | |
def accept(self, c): | |
if self.next_char() != c: | |
self.err() | |
self.consume() | |
return True | |
def accept_multi(self, multi): | |
for c in multi: | |
self.accept(c) | |
def accept_regexp(self, l, r): | |
multi = self.consume(l) | |
if re.search(r, multi) is None: | |
raise | |
return True | |
def consume_blank(self): | |
while re.search(r"\s", self.next_char()): | |
self.consume() | |
def err(self): | |
raise Exception("unexpected char '%s', nearly '%s' " % (self.next_char(), self.text[self.i:self.i+20])) | |
def parse(self, text): | |
self.text = text | |
self.i = -1 | |
self.consume_blank() | |
if self.next_char() == '[': | |
self.js = self.parse_array() | |
elif self.next_char() == '{': | |
self.js = self.parse_object() | |
else: | |
self.err() | |
def parse_object(self): | |
d = dict() | |
self.consume_blank() | |
self.accept('{') | |
self.consume_blank() | |
if self.next_char() == '"': | |
k, v = self.parse_pair() | |
d[k] = v | |
while self.next_char() == ',': | |
self.accept(',') | |
k, v = self.parse_pair() | |
d[k] = v | |
self.accept('}') | |
return d | |
def parse_pair(self): | |
self.consume_blank() | |
k = self.parse_string() | |
self.accept(":") | |
v = self.parse_value() | |
self.consume_blank() | |
return k, v | |
def parse_string(self): | |
self.accept('"') | |
s = self.i | |
while self.next_char() != '"': | |
c = self.next_char() | |
if c == '\\': | |
self.consume() | |
if self.next_char() == 'u': | |
self.parse_unicode() | |
elif self.next_char() == '\\': | |
self.consume() | |
elif self.next_char() == '"': | |
self.consume() | |
else: | |
self.accept_regexp(1, r'[/bfnrt]') | |
else: | |
self.consume() | |
e = self.i | |
self.accept('"') | |
self.consume_blank() | |
return self.text[s+1: e+1] | |
def parse_value(self): | |
self.consume_blank() | |
if self.next_char() == '"': | |
v = self.parse_string() | |
elif re.search(r"[1-9-]", self.next_char()): | |
v = self.parse_number() | |
elif self.next_char() == '{': | |
v = self.parse_object() | |
elif self.next_char() == '[': | |
v = self.parse_array() | |
elif self.next_char() == 't': | |
self.accept_multi("true") | |
v = 'true' | |
elif self.next_char() == 'f': | |
self.accept_multi("false") | |
v = 'false' | |
elif self.next_char() == 'n': | |
self.accept_multi("null") | |
v = 'null' | |
else: | |
self.err() | |
self.consume_blank() | |
return v | |
def parse_array(self): | |
l = [] | |
self.accept('[') | |
self.consume_blank() | |
while self.next_char() != ']': | |
l = self.parse_element() | |
self.accept(']') | |
self.consume_blank() | |
return l | |
def parse_element(self): | |
ele = [self.parse_value()] | |
while self.next_char() == ',': | |
self.accept(',') | |
ele.append(self.parse_value()) | |
self.consume_blank() | |
return ele | |
def parse_unicode(self): | |
self.accept('u') | |
self.accept_regexp(4, r"[0-9a-fA-F]{4}") | |
def parse_number(self): | |
s = self.i | |
self.parse_int() | |
if self.next_char() == '.': | |
self.parse_frac() | |
if self.next_char() in ('e', "E"): | |
self.parse_exp() | |
e = self.i | |
t = self.text[s+1: e+1] | |
if 'e' in t or '.' in t: | |
return float(t) | |
else: | |
return int(t) | |
def parse_int(self): | |
if self.next_char() == '-': | |
self.consume() | |
self.parse_digits() | |
def parse_frac(self): | |
self.accept('.') | |
self.parse_digits() | |
def parse_exp(self): | |
self.accept_regexp(1, r'e|E') | |
if self.next_char() in ('+', '-'): | |
self.consume() | |
self.parse_digits() | |
def parse_digits(self): | |
self.accept_regexp(1, '[0-9]') | |
while self.next_char().isdigit(): | |
self.consume() | |
if __name__ == "__main__": | |
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#coding:utf-8 | |
import sys | |
import parse | |
class Pretty(object): | |
def __init__(self): | |
self.buffer = [] | |
self.indent = 4 * ' ' | |
self.on_line = False | |
def pretty(self, js): | |
self.buffer = [] | |
self.pretty_json(js, 0) | |
self.clear_buffer() | |
def ptr(self, d, str, newlines=True): | |
if newlines is False: | |
print "%s%s" % (self.indent * d, str), | |
self.on_line = True | |
elif self.on_line: | |
print str | |
self.on_line = False | |
else: | |
print "%s%s" % (self.indent * d, str) | |
def pretty_json(self, js, d=0, comma=False): | |
if isinstance(js, list): | |
self.pretty_list(js, d) | |
elif isinstance(js, dict): | |
self.pretty_map(js, d) | |
elif isinstance(js, str): | |
self.ptr(d, '"%s"%s' % (js, ',' if comma else '')) | |
elif js is None: | |
self.ptr(d, '%s%s' % ("null", ',' if comma else '')) | |
elif isinstance(js, bool): | |
self.ptr(d, '%s%s' % ("true" if js else "false", ',' if comma else '')) | |
else: | |
self.ptr(d, '%s%s' % (js, ',' if comma else '')) | |
def pretty_list(self, ls, d): | |
self.ptr(d, "[") | |
self.buffer.append([d, ']']) | |
i = 0 | |
for js in ls: | |
self.pretty_json(js, d+1, i != len(ls) - 1) | |
i += 1 | |
def pretty_map(self, map, d): | |
self.ptr(d, "{") | |
self.buffer.append([d, '}']) | |
i = 0 | |
for k, v in map.items(): | |
self.pretty_key(k, d+1) | |
self.pretty_json(v, d+1, i != len(map) - 1) | |
i += 1 | |
def pretty_key(self, k, d): | |
self.ptr(d, '"%s":' % (k, ), False) | |
def clear_buffer(self): | |
while self.buffer: | |
d, p = self.buffer.pop(-1) | |
self.ptr(d, "%s" % (p, )) | |
def test(): | |
test = {"a": 1, "b": [1,2,"c", {"f": {"a": "1", "b": [1,2,3]}}]} | |
test2 = [{"a": 1, "b": 3}, 3, 4, [[[[1,2,3]]]]] | |
Pretty().pretty(test) | |
Pretty().pretty(test2) | |
if __name__ == "__main__": | |
f = sys.stdin.readlines() | |
p = parse.Parse() | |
p.parse(''.join(f)) | |
Pretty().pretty(p.js) | |
Author
ls0f
commented
Jul 1, 2016
curl https://raw.githubusercontent.com/antlr/grammars-v4/master/json/examples/example1.json 2>/dev/null | python prettyjson.py
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment