Created
May 18, 2016 17:56
-
-
Save bshepherdson/c58fb43f9ef6764062313858a96e63ed to your computer and use it in GitHub Desktop.
Pony streaming JSON API
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use "collections" | |
type JsonValue is (String | I32 | F64 | None) | |
primitive JsonTokenStartArray | |
primitive JsonTokenEndArray | |
primitive JsonTokenStartObject | |
primitive JsonTokenEndObject | |
primitive JsonTokenFieldName | |
primitive JsonTokenNull | |
primitive JsonTokenFalse | |
primitive JsonTokenTrue | |
primitive JsonTokenString | |
primitive JsonTokenInt | |
primitive JsonTokenFloat | |
primitive JsonTokenNothing | |
type JsonToken is (JsonTokenStartArray | JsonTokenEndArray | | |
JsonTokenStartObject | JsonTokenEndObject | JsonTokenFieldName | | |
JsonTokenNull | JsonTokenFalse | JsonTokenTrue | JsonTokenString | | |
JsonTokenInt | JsonTokenFloat | JsonTokenNothing) | |
class JsonParser | |
""" | |
A moving cursor that exposes JSON data as a stream of tokens with data. | |
Based on the Jackson Streaming API. | |
""" | |
let _array: Array[U8] | |
var _index: USize = 0 | |
var _inArray: Bool = false | |
var _inObject: Bool = false | |
var _keyStack: List[String] = List[String] | |
// True means object, False means array. | |
var _nestStack: List[Bool] = List[Bool] | |
var _token: JsonToken = JsonTokenNothing | |
// Since JSON strings contain escape sequences, they have to be copied to | |
// really get the string. | |
// For speed, however, that process is deferred until as late as possible - | |
// right before returning the string to the user. | |
// If we are forced to build a string for the user, it gets cached here. | |
var _tempString: (String val | None) = None | |
// This is the position of the last string we read in. | |
var _stringStart: USize = 0 | |
var _stringEnd: USize = 0 | |
// Whenever a value of this type is parsed as the last token, the appropriate | |
// type here is set. | |
var _tempInt: I32 = 0 | |
var _tempFloat: F64 = 0 | |
new create(arr: Array[U8]) => | |
_array = arr | |
fun ref next_token(): JsonToken ? => | |
"""Advances to the next token in the stream, and returns it.""" | |
// We decide what to parse next based on the previous token. | |
// The tricky bit is what to do when we finish a complete value (an object, | |
// an array, or a simple value. | |
// - If we're inside an array, expect either a comma or ] | |
// - If we're inside an object, pop the last key, expect a comma and another | |
// key-value pair, or a } | |
// - For a simple value, we're done. | |
match _token | |
| JsonTokenNothing => _parse_anything() | |
| JsonTokenFieldName => | |
_whitespace() | |
_require(':') | |
_parse_anything() | |
| JsonTokenStartArray => _parse_anything() | |
| JsonTokenStartObject => _parse_field_name() | |
else | |
// We've just finished a complete value. Let's check whether we were | |
// nested inside anything. | |
if _nestStack.size() > 0 then | |
if _nestStack(0) then | |
// Object, so expect either , or }. | |
// Also, remove the last key from the keyStack. | |
_keyStack.shift() | |
_whitespace() | |
match _peek() | |
| '}' => | |
_token = JsonTokenEndObject | |
_nestStack.shift() | |
| ',' => | |
_consume() | |
_parse_field_name() | |
else | |
error | |
end | |
else | |
// Array, so expect either , or ] | |
_whitespace() | |
match _peek() | |
| ']' => | |
_token = JsonTokenEndArray | |
_nestStack.shift() | |
| ',' => | |
_consume() | |
_parse_anything() | |
else | |
error | |
end | |
end | |
else | |
// EOF | |
_token = JsonTokenNothing | |
end | |
end | |
_token | |
// Parser internals | |
fun ref _peek(): U8 ? => _array(_index) | |
fun ref _consume() => _index = _index + 1 | |
fun ref _get(): U8 ? => | |
let c = _peek() | |
_consume() | |
c | |
fun ref _require(req: U8) ? => | |
let c = _get() | |
if c != req then error end | |
fun ref _whitespace() ? => | |
"""Consumes 0 or more whitespace characters and discards them.""" | |
while true do | |
match _peek() | |
| ' ' => None | |
| '\t' => None | |
| '\r' => None | |
| '\n' => None | |
else | |
break | |
end | |
_consume() | |
end | |
fun ref _parse_field_name() ? => | |
// Parses either a quoted string or a single identifier. | |
// Technically the quotes are required in JSON, but oh well. | |
_whitespace() | |
match _peek() | |
| '"' => | |
_parse_string() // Sets _stringStart and _stringEnd. | |
else | |
_stringStart = _index | |
while true do | |
let c = _peek() | |
if ((c >= 'a') and (c <= 'z')) or ((c >= 'A') and (c <= 'Z')) or | |
((c >= '0') and (c <= '9')) or (c == '$') or (c == '_') then | |
_consume() | |
else | |
break | |
end | |
end | |
_stringEnd = _index | |
end | |
// Now the field name is properly indexed, so we read it in. | |
// TODO: This could be lazier, but then it's not as clear where _stringStart | |
// and _stringEnd are pointing, if the value is a string etc. etc. | |
_read_string() | |
_keyStack.unshift(_tempString as String val) | |
_token = JsonTokenFieldName | |
fun ref _parse_anything() ? => | |
_whitespace() | |
let c = _peek() | |
if c == '"' then | |
_parse_string() | |
elseif (c == '-') or ((c >= '0') and (c <= '9')) then | |
_parse_number() | |
elseif c == '[' then | |
_consume() | |
_token = JsonTokenStartArray | |
_nestStack.unshift(false) | |
elseif c == '{' then | |
_consume() | |
_token = JsonTokenStartObject | |
_nestStack.unshift(true) | |
elseif c == 'n' then | |
_require('u') | |
_require('l') | |
_require('l') | |
_token = JsonTokenNull | |
elseif c == 't' then | |
_require('r') | |
_require('u') | |
_require('e') | |
_token = JsonTokenTrue | |
elseif c == 'f' then | |
_require('a') | |
_require('l') | |
_require('s') | |
_require('e') | |
_token = JsonTokenFalse | |
else | |
error | |
end | |
fun ref _parse_string() ? => | |
_require('"') | |
_tempString = None | |
_stringStart = _index | |
var backslashed: Bool = false | |
while true do | |
let c = _peek() | |
if (not backslashed) and (c == '"') then | |
break | |
end | |
if (c == '\\') and (not backslashed) then | |
backslashed = true | |
else | |
backslashed = false | |
end | |
_consume() | |
end | |
// Aimed at the quote mark. | |
_stringEnd = _index | |
_token = JsonTokenString | |
// Turns the _stringString/End indices into a real String, honouring the | |
// escape sequences. | |
fun ref _read_string() ? => | |
// Maximum length of the output string is the difference between the | |
// endpoints, so we reserve that much space up front. | |
var s = recover iso String end | |
s.reserve(_stringEnd - _stringStart) | |
var i = _stringStart | |
while i < _stringEnd do | |
var c = _array(i) | |
if c == '\\' then | |
c = _array(i + 1) | |
i = i + 2 | |
match c | |
| '\\' => s.push('\\') | |
| '"' => s.push('"') | |
| '/' => s.push('/') | |
| 'b' => s.push('\b') | |
| 'f' => s.push('\f') | |
| 'n' => s.push('\n') | |
| 'r' => s.push('\r') | |
| 't' => s.push('\t') | |
| 'x' => | |
// Exactly 4 hex digits follow. Upper- and lowercase are allowed. | |
var x: U32 = 0 | |
for j in Range(0, 4) do | |
x = (x << 4) or _hex_digit(_array(i + j)) | |
end | |
let hex = String.from_utf32(x) | |
for h in hex.values() do s.push(h) end | |
i = i + 4 // Add four more to the index for the hex digits. | |
else | |
error | |
end | |
else | |
s.push(c) | |
i = i + 1 | |
end | |
end | |
_tempString = consume s | |
fun _hex_digit(x: U8): U32 ? => | |
if (x >= '0') or (x <= '9') then | |
(x - '0').u32() | |
elseif (x >= 'a') or (x <= 'f') then | |
(x - 'a').u32() | |
elseif (x >= 'A') or (x <= 'F') then | |
(x - 'A').u32() | |
else | |
error | |
end | |
fun ref _parse_number() ? => | |
// TODO: Should be possible to parse numbers lazily, using the indices. | |
// That would be marginally more efficient. | |
var negative = false | |
if _peek() == '-' then | |
negative = true | |
_consume() | |
end | |
var frac: I32 = 0 | |
var frac_digits: U8 = 0 | |
var exp: I32 = 0 | |
var exp_digits: U8 = 0 | |
// Start with the integer part. | |
(let int, _) = _parse_decimal() | |
if _peek() == '.' then | |
_consume() | |
(frac, frac_digits) = _parse_decimal() | |
end | |
if (_peek() or 0x20) == 'e' then | |
// Exponential part | |
_consume() | |
var neg_exp = false | |
match _peek() | |
| '-' => _consume(); neg_exp = true | |
| '+' => _consume() | |
end | |
(exp, exp_digits) = _parse_decimal() | |
if neg_exp then | |
exp = -exp | |
end | |
end | |
if (frac_digits == 0) and (exp_digits == 0) then | |
// Integer | |
_token = JsonTokenInt | |
_tempInt = if negative then -int else int end | |
end | |
// Otherwise we've got a floating point value. | |
var f = (int.f64() + (frac.f64() / F64(10).pow(frac_digits.f64()))) * | |
(F64(10).pow(exp.f64())) | |
_tempFloat = if negative then -f else f end | |
fun ref _parse_decimal(): (I32 /* value */, U8 /* digit count */) ? => | |
var value: I32 = 0 | |
var digit_count: U8 = 0 | |
var c = _peek() | |
while (c >= '0') and (c <= '9') do | |
_consume() | |
value = (value * 10) + (c - '0').i32() | |
digit_count = digit_count + 1 | |
c = _peek() | |
end | |
if digit_count == 0 then | |
error | |
end | |
(value, digit_count) | |
fun get_bool(): Bool ? => | |
""" | |
Returns true or false if the current token is JsonTokenTrue or | |
JsonTokenFalse. Errors on any other token. | |
""" | |
if _token is JsonTokenTrue then return true | |
elseif _token is JsonTokenFalse then return false | |
else error end | |
fun get_int(): I32 ? => | |
""" | |
Returns the value as an integer. Converts from float if the current token | |
is JsonTokenFloat. Errors if the type is something else. | |
""" | |
match _token | |
| JsonTokenFloat => _tempFloat.i32() | |
| JsonTokenInt => _tempInt | |
else | |
error | |
end | |
fun get_float(): F64 ? => | |
""" | |
Returns the value as a float. Converts from integer if the current token | |
is JsonTokenInt. Errors if the type is something else. | |
""" | |
match _token | |
| JsonTokenFloat => _tempFloat | |
| JsonTokenInt => _tempInt.f64() | |
else | |
error | |
end | |
fun ref get_string(): String val ? => | |
""" | |
This works on JsonTokenFieldName and JsonTokenString tokens. | |
""" | |
if _tempString == None then | |
_read_string() | |
end | |
_tempString | |
fun is_null(): Bool => _token is JsonTokenNull | |
fun eof(): Bool => _token is JsonTokenNothing | |
// Returns the most recent object key. Will error if we're not under any keys. | |
fun field(): String ? => _keyStack(0) | |
fun token(): JsonToken => _token | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
interface FromJSON | |
new create() | |
fun ref from_json(parser: JsonParser) ? | |
primitive JsonLoader | |
fun deserialize_file[A: FromJSON val](fp : FilePath) : Array[A] val => | |
"""Reads a JSON file containing an array, and converts each to an A.""" | |
let res = try | |
with f = OpenFile(fp) as File do | |
let arr: Array[U8] = f.read(f.size()) | |
_deserialize_array[A](recover JsonParser(arr) end) | |
end | |
end | |
match res | let a : Array[A] val => a | |
else | |
recover val Array[A] end | |
end | |
fun _deserialize_array[A: FromJSON val](p : JsonParser) : Array[A] val ? => | |
var out : Array[A] iso = recover Array[A]() end | |
if p.next_token() isnt JsonTokenStartArray then error end | |
while p.next_token() is JsonTokenStartObject do | |
var a: FromJSON iso = A.create() | |
a.from_json(p) | |
out.push(consume a) | |
end | |
consume out |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment