Skip to content

Instantly share code, notes, and snippets.

@bshepherdson
Created May 18, 2016 17:56
Show Gist options
  • Save bshepherdson/c58fb43f9ef6764062313858a96e63ed to your computer and use it in GitHub Desktop.
Save bshepherdson/c58fb43f9ef6764062313858a96e63ed to your computer and use it in GitHub Desktop.
Pony streaming JSON API
use "collections"
type JsonValue is (String | I32 | F64 | None)
primitive JsonTokenStartArray
primitive JsonTokenEndArray
primitive JsonTokenStartObject
primitive JsonTokenEndObject
primitive JsonTokenFieldName
primitive JsonTokenNull
primitive JsonTokenFalse
primitive JsonTokenTrue
primitive JsonTokenString
primitive JsonTokenInt
primitive JsonTokenFloat
primitive JsonTokenNothing
type JsonToken is (JsonTokenStartArray | JsonTokenEndArray |
JsonTokenStartObject | JsonTokenEndObject | JsonTokenFieldName |
JsonTokenNull | JsonTokenFalse | JsonTokenTrue | JsonTokenString |
JsonTokenInt | JsonTokenFloat | JsonTokenNothing)
class JsonParser
"""
A moving cursor that exposes JSON data as a stream of tokens with data.
Based on the Jackson Streaming API.
"""
let _array: Array[U8]
var _index: USize = 0
var _inArray: Bool = false
var _inObject: Bool = false
var _keyStack: List[String] = List[String]
// True means object, False means array.
var _nestStack: List[Bool] = List[Bool]
var _token: JsonToken = JsonTokenNothing
// Since JSON strings contain escape sequences, they have to be copied to
// really get the string.
// For speed, however, that process is deferred until as late as possible -
// right before returning the string to the user.
// If we are forced to build a string for the user, it gets cached here.
var _tempString: (String val | None) = None
// This is the position of the last string we read in.
var _stringStart: USize = 0
var _stringEnd: USize = 0
// Whenever a value of this type is parsed as the last token, the appropriate
// type here is set.
var _tempInt: I32 = 0
var _tempFloat: F64 = 0
new create(arr: Array[U8]) =>
_array = arr
fun ref next_token(): JsonToken ? =>
"""Advances to the next token in the stream, and returns it."""
// We decide what to parse next based on the previous token.
// The tricky bit is what to do when we finish a complete value (an object,
// an array, or a simple value.
// - If we're inside an array, expect either a comma or ]
// - If we're inside an object, pop the last key, expect a comma and another
// key-value pair, or a }
// - For a simple value, we're done.
match _token
| JsonTokenNothing => _parse_anything()
| JsonTokenFieldName =>
_whitespace()
_require(':')
_parse_anything()
| JsonTokenStartArray => _parse_anything()
| JsonTokenStartObject => _parse_field_name()
else
// We've just finished a complete value. Let's check whether we were
// nested inside anything.
if _nestStack.size() > 0 then
if _nestStack(0) then
// Object, so expect either , or }.
// Also, remove the last key from the keyStack.
_keyStack.shift()
_whitespace()
match _peek()
| '}' =>
_token = JsonTokenEndObject
_nestStack.shift()
| ',' =>
_consume()
_parse_field_name()
else
error
end
else
// Array, so expect either , or ]
_whitespace()
match _peek()
| ']' =>
_token = JsonTokenEndArray
_nestStack.shift()
| ',' =>
_consume()
_parse_anything()
else
error
end
end
else
// EOF
_token = JsonTokenNothing
end
end
_token
// Parser internals
fun ref _peek(): U8 ? => _array(_index)
fun ref _consume() => _index = _index + 1
fun ref _get(): U8 ? =>
let c = _peek()
_consume()
c
fun ref _require(req: U8) ? =>
let c = _get()
if c != req then error end
fun ref _whitespace() ? =>
"""Consumes 0 or more whitespace characters and discards them."""
while true do
match _peek()
| ' ' => None
| '\t' => None
| '\r' => None
| '\n' => None
else
break
end
_consume()
end
fun ref _parse_field_name() ? =>
// Parses either a quoted string or a single identifier.
// Technically the quotes are required in JSON, but oh well.
_whitespace()
match _peek()
| '"' =>
_parse_string() // Sets _stringStart and _stringEnd.
else
_stringStart = _index
while true do
let c = _peek()
if ((c >= 'a') and (c <= 'z')) or ((c >= 'A') and (c <= 'Z')) or
((c >= '0') and (c <= '9')) or (c == '$') or (c == '_') then
_consume()
else
break
end
end
_stringEnd = _index
end
// Now the field name is properly indexed, so we read it in.
// TODO: This could be lazier, but then it's not as clear where _stringStart
// and _stringEnd are pointing, if the value is a string etc. etc.
_read_string()
_keyStack.unshift(_tempString as String val)
_token = JsonTokenFieldName
fun ref _parse_anything() ? =>
_whitespace()
let c = _peek()
if c == '"' then
_parse_string()
elseif (c == '-') or ((c >= '0') and (c <= '9')) then
_parse_number()
elseif c == '[' then
_consume()
_token = JsonTokenStartArray
_nestStack.unshift(false)
elseif c == '{' then
_consume()
_token = JsonTokenStartObject
_nestStack.unshift(true)
elseif c == 'n' then
_require('u')
_require('l')
_require('l')
_token = JsonTokenNull
elseif c == 't' then
_require('r')
_require('u')
_require('e')
_token = JsonTokenTrue
elseif c == 'f' then
_require('a')
_require('l')
_require('s')
_require('e')
_token = JsonTokenFalse
else
error
end
fun ref _parse_string() ? =>
_require('"')
_tempString = None
_stringStart = _index
var backslashed: Bool = false
while true do
let c = _peek()
if (not backslashed) and (c == '"') then
break
end
if (c == '\\') and (not backslashed) then
backslashed = true
else
backslashed = false
end
_consume()
end
// Aimed at the quote mark.
_stringEnd = _index
_token = JsonTokenString
// Turns the _stringString/End indices into a real String, honouring the
// escape sequences.
fun ref _read_string() ? =>
// Maximum length of the output string is the difference between the
// endpoints, so we reserve that much space up front.
var s = recover iso String end
s.reserve(_stringEnd - _stringStart)
var i = _stringStart
while i < _stringEnd do
var c = _array(i)
if c == '\\' then
c = _array(i + 1)
i = i + 2
match c
| '\\' => s.push('\\')
| '"' => s.push('"')
| '/' => s.push('/')
| 'b' => s.push('\b')
| 'f' => s.push('\f')
| 'n' => s.push('\n')
| 'r' => s.push('\r')
| 't' => s.push('\t')
| 'x' =>
// Exactly 4 hex digits follow. Upper- and lowercase are allowed.
var x: U32 = 0
for j in Range(0, 4) do
x = (x << 4) or _hex_digit(_array(i + j))
end
let hex = String.from_utf32(x)
for h in hex.values() do s.push(h) end
i = i + 4 // Add four more to the index for the hex digits.
else
error
end
else
s.push(c)
i = i + 1
end
end
_tempString = consume s
fun _hex_digit(x: U8): U32 ? =>
if (x >= '0') or (x <= '9') then
(x - '0').u32()
elseif (x >= 'a') or (x <= 'f') then
(x - 'a').u32()
elseif (x >= 'A') or (x <= 'F') then
(x - 'A').u32()
else
error
end
fun ref _parse_number() ? =>
// TODO: Should be possible to parse numbers lazily, using the indices.
// That would be marginally more efficient.
var negative = false
if _peek() == '-' then
negative = true
_consume()
end
var frac: I32 = 0
var frac_digits: U8 = 0
var exp: I32 = 0
var exp_digits: U8 = 0
// Start with the integer part.
(let int, _) = _parse_decimal()
if _peek() == '.' then
_consume()
(frac, frac_digits) = _parse_decimal()
end
if (_peek() or 0x20) == 'e' then
// Exponential part
_consume()
var neg_exp = false
match _peek()
| '-' => _consume(); neg_exp = true
| '+' => _consume()
end
(exp, exp_digits) = _parse_decimal()
if neg_exp then
exp = -exp
end
end
if (frac_digits == 0) and (exp_digits == 0) then
// Integer
_token = JsonTokenInt
_tempInt = if negative then -int else int end
end
// Otherwise we've got a floating point value.
var f = (int.f64() + (frac.f64() / F64(10).pow(frac_digits.f64()))) *
(F64(10).pow(exp.f64()))
_tempFloat = if negative then -f else f end
fun ref _parse_decimal(): (I32 /* value */, U8 /* digit count */) ? =>
var value: I32 = 0
var digit_count: U8 = 0
var c = _peek()
while (c >= '0') and (c <= '9') do
_consume()
value = (value * 10) + (c - '0').i32()
digit_count = digit_count + 1
c = _peek()
end
if digit_count == 0 then
error
end
(value, digit_count)
fun get_bool(): Bool ? =>
"""
Returns true or false if the current token is JsonTokenTrue or
JsonTokenFalse. Errors on any other token.
"""
if _token is JsonTokenTrue then return true
elseif _token is JsonTokenFalse then return false
else error end
fun get_int(): I32 ? =>
"""
Returns the value as an integer. Converts from float if the current token
is JsonTokenFloat. Errors if the type is something else.
"""
match _token
| JsonTokenFloat => _tempFloat.i32()
| JsonTokenInt => _tempInt
else
error
end
fun get_float(): F64 ? =>
"""
Returns the value as a float. Converts from integer if the current token
is JsonTokenInt. Errors if the type is something else.
"""
match _token
| JsonTokenFloat => _tempFloat
| JsonTokenInt => _tempInt.f64()
else
error
end
fun ref get_string(): String val ? =>
"""
This works on JsonTokenFieldName and JsonTokenString tokens.
"""
if _tempString == None then
_read_string()
end
_tempString
fun is_null(): Bool => _token is JsonTokenNull
fun eof(): Bool => _token is JsonTokenNothing
// Returns the most recent object key. Will error if we're not under any keys.
fun field(): String ? => _keyStack(0)
fun token(): JsonToken => _token
interface FromJSON
new create()
fun ref from_json(parser: JsonParser) ?
primitive JsonLoader
fun deserialize_file[A: FromJSON val](fp : FilePath) : Array[A] val =>
"""Reads a JSON file containing an array, and converts each to an A."""
let res = try
with f = OpenFile(fp) as File do
let arr: Array[U8] = f.read(f.size())
_deserialize_array[A](recover JsonParser(arr) end)
end
end
match res | let a : Array[A] val => a
else
recover val Array[A] end
end
fun _deserialize_array[A: FromJSON val](p : JsonParser) : Array[A] val ? =>
var out : Array[A] iso = recover Array[A]() end
if p.next_token() isnt JsonTokenStartArray then error end
while p.next_token() is JsonTokenStartObject do
var a: FromJSON iso = A.create()
a.from_json(p)
out.push(consume a)
end
consume out
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment