Last active
October 29, 2020 01:11
-
-
Save iliabylich/0b9d8ecfac1df86667025e219a2ed5a9 to your computer and use it in GitHub Desktop.
recursive json parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'strscan' | |
class Result | |
class Ok < self | |
def or(&block) | |
self | |
end | |
def and(&block) | |
chained = block.call(value) | |
case chained | |
when Ok | |
Ok.new(value + chained.value, length + chained.length, @buffer) | |
when Error | |
rollback | |
chained | |
end | |
end | |
def map(&block) | |
yield @value, @length | |
end | |
def map_value(value = nil, &block) | |
if block_given? | |
value = block.call(@value) | |
end | |
Ok.new(value, @length, @buffer) | |
end | |
def as_array | |
Ok.new([@value], @length, @buffer) | |
end | |
end | |
class Error < self | |
def or(&block) | |
rollback | |
block.call(self) | |
end | |
def and(&block) | |
self | |
end | |
def map(&block) | |
self | |
end | |
def map_value(value = nil, &block) | |
self | |
end | |
def as_array | |
self | |
end | |
end | |
def initialize(value, length, buffer) | |
@value = value | |
@length = length | |
@buffer = buffer | |
end | |
def rollback | |
@buffer.pos -= length | |
@value = :rollback | |
end | |
attr_reader :value, :length | |
end | |
class MonadicJsonParser | |
class Error < StandardError | |
end | |
def initialize(input) | |
@input = input | |
end | |
def tokens | |
@tokens = [] | |
@buffer = StringScanner.new(@input) | |
result = try_json | |
result.value | |
end | |
def try_json | |
try_element | |
end | |
def try_value | |
try_object | |
.or { try_array } | |
.or { try_string } | |
.or { try_number } | |
.or { try_raw('true').map_value(true) } | |
.or { try_raw('false').map_value(false) } | |
.or { try_raw('null').map_value(nil) } | |
end | |
def try_object | |
try_raw('{') | |
.and { try_members.or { try_ws } } | |
.and { try_raw('}') } | |
.map_value { |(_lcurly, *pairs, _rcurly)| pairs.reduce(&:merge) } | |
end | |
def try_members | |
try_member.as_array | |
.and { | |
( | |
try_raw(',') | |
.and { try_members } | |
.map_value { |(_comma, *pairs)| pairs } | |
).or { Ok([]) } | |
} | |
end | |
def try_member | |
try_ws | |
.and { try_string.as_array } | |
.and { try_ws } | |
.and { try_raw(':') } | |
.and { try_ws } | |
.and { try_element.as_array } | |
.map_value { |(_ws, string, _ws, _colon, _ws, element)| { string => element } } | |
end | |
def try_array | |
try_raw('[') | |
.and { try_elements.or { try_ws } } | |
.and { try_raw(']') } | |
.map_value { |(_lbrack, *elements, _rbrack)| elements } | |
end | |
def try_elements | |
try_element | |
.as_array | |
.and { | |
try_raw(',') | |
.and { try_elements } | |
.map_value { |(_comma, *elements)| elements } | |
.or { Ok([]) } | |
} | |
end | |
def try_element | |
try_ws | |
.and { try_value.as_array } | |
.and { try_ws } | |
.map_value { |(_wd, value, _wd)| value } | |
end | |
def try_string | |
try_raw('"') | |
.and { try_characters } | |
.and { try_raw('"') } | |
.map_value { |(_quote, *chars, _quote)| chars.join } | |
end | |
def try_characters | |
try_character | |
.and { try_characters } | |
.or { Ok(['']) } | |
end | |
def try_character | |
try_raw(/[^"\\]+/) | |
.or { | |
try_raw("\\") | |
.map_value('') | |
.and { try_escape } | |
.as_array | |
} | |
end | |
def try_escape | |
try_raw("\"").map_value('"') | |
.or { try_raw("\\").map_value("\\") } | |
.or { try_raw("/").map_value("/") } | |
.or { try_raw("b").map_value("\b") } | |
.or { try_raw("f").map_value("\f") } | |
.or { try_raw("n").map_value("\n") } | |
.or { try_raw("r").map_value("\r") } | |
.or { try_raw("t").map_value("\t") } | |
.or { | |
try_raw('u') | |
.map_value('') | |
.and { try_hex } | |
.and { try_hex } | |
.and { try_hex } | |
.and { try_hex } | |
.map_value { |hex| hex.to_i(16).chr(Encoding::UTF_8) } | |
} | |
end | |
def try_hex | |
try_raw(/[0-9A-Fa-f]/).map_value { |(value)| value } | |
end | |
def try_number | |
try_integer.as_array | |
.and { try_fraction.as_array } | |
.and { try_exponent.as_array } | |
.map_value { |(integer, fraction, exponent)| | |
if fraction || exponent | |
[*integer, *fraction, *exponent].join.to_f | |
else | |
[*integer, *fraction].join.to_i | |
end | |
} | |
end | |
def try_integer | |
try_raw('-').or { Ok(['']) } | |
.and { | |
(try_onenine.and { try_digits }) | |
.or { try_digit } | |
} | |
end | |
def try_digits | |
try_digit.and { try_digits.or { Ok([]) } } | |
end | |
def try_digit | |
try_raw('0').or { try_onenine } | |
end | |
def try_onenine | |
try_raw(/[1-9]/) | |
end | |
def try_fraction | |
(try_raw('.').and { try_digits }) | |
.or { Ok(nil) } | |
end | |
def try_exponent | |
try_raw(/[eE]/) | |
.and { try_sign } | |
.and { try_digits } | |
.or { Ok(nil) } | |
end | |
def try_sign | |
try_raw('-') | |
.or { try_raw('+') } | |
.or { Ok(['']) } | |
end | |
def try_ws | |
try_raw(/\s*/) | |
end | |
# .... | |
def Ok(value) | |
Result::Ok.new(value, 0, @buffer) | |
end | |
def try_raw(raw) | |
if @buffer.scan(raw) | |
Result::Ok.new([@buffer.matched], @buffer.matched.length, @buffer) | |
else | |
Result::Error.new( | |
[:unexpected_char, expected: raw, got: @buffer.string[@buffer.pos]], | |
0, | |
@buffer | |
) | |
end | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ws = " \n\r\t" | |
input = <<-JSON | |
{ | |
#{ws}"whitespaces"#{ws}:#{ws}" "#{ws}, | |
"null": null, | |
"true": true, | |
"false": false, | |
"numbers": [ | |
1, | |
-12, | |
1.2, | |
-1.2, | |
1e3, | |
-1e3, | |
1.2e3, | |
-1.2e3, | |
1E3, | |
-1E3, | |
1.2E3, | |
-1.2E3, | |
1e-3, | |
-1e-3, | |
1.2e-3, | |
-1.2e-3, | |
1E-3, | |
-1E-3, | |
1.2E-3, | |
-1.2E-3 | |
], | |
"strings": "\\" \\/ \\\\ \\b \\f \\n \\r \\t \\u0123 \\u4567 \\u890A \\uBCDE \\uffff some text", | |
"array": [#{ws}1#{ws},#{ws}2#{ws}], | |
"object": {#{ws}"key"#{ws}:#{ws}"value"#{ws}} | |
} | |
JSON | |
expected = { | |
"whitespaces"=>" ", | |
"null"=>nil, | |
"true"=>true, | |
"false"=>false, | |
"numbers"=>[ | |
1, | |
-12, | |
1.2, | |
-1.2, | |
1000.0, | |
-1000.0, | |
1200.0, | |
-1200.0, | |
1000.0, | |
-1000.0, | |
1200.0, | |
-1200.0, | |
0.001, | |
-0.001, | |
0.0012, | |
-0.0012, | |
0.001, | |
-0.001, | |
0.0012, | |
-0.0012 | |
], | |
"strings"=>"\" / \\ \b \f \n \r \t ģ 䕧 褊 볞 \uFFFF some text", | |
"array"=>[1, 2], | |
"object"=>{"key"=>"value"} | |
} | |
require 'json' | |
output = JSON.parse(input) | |
p output | |
p output == expected | |
require_relative './lexer' | |
output2 = MonadicJsonParser.new(input).tokens | |
p output2 | |
p output2 == expected |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment