-
-
Save pstuifzand/4447349 to your computer and use it in GitHub Desktop.
package MarpaX::JSON; | |
use strict; | |
use Marpa::R2 2.039_000; | |
sub new { | |
my ($class) = @_; | |
my $self = bless {}, $class; | |
$self->{grammar} = Marpa::R2::Scanless::G->new( | |
{ | |
action_object => 'MarpaX::JSON::Actions', | |
default_action => 'do_first_arg', | |
source => \(<<'END_OF_SOURCE'), | |
:start ::= json | |
json ::= object | |
| array | |
object ::= '{' '}' action => do_empty_object | |
| '{' members '}' action => do_object | |
members ::= pair+ separator => <comma> action => do_list | |
pair ::= string ':' value action => do_pair | |
value ::= string | |
| object | |
| number | |
| array | |
| 'true' action => do_true | |
| 'false' action => do_true | |
| 'null' action => do_null | |
array ::= '[' ']' action => do_empty_array | |
| '[' elements ']' action => do_array | |
elements ::= value+ separator => <comma> action => do_list | |
number ~ int | |
| int frac | |
| int exp | |
| int frac exp | |
int ~ digits | |
| '-' digits | |
digits ~ [\d]+ | |
frac ~ '.' digits | |
exp ~ e digits | |
e ~ 'e' | |
| 'e+' | |
| 'e-' | |
| 'E' | |
| 'E+' | |
| 'E-' | |
string ::= lstring action => do_string | |
lstring ~ quote in_string quote | |
quote ~ ["] | |
in_string ~ in_string_char* | |
in_string_char ~ [^"\\] | |
| '\' '"' | |
| '\' 'b' | |
| '\' 'f' | |
| '\' 't' | |
| '\' 'n' | |
| '\' 'r' | |
| '\' 'u' four_hex_digits | |
| '\' '/' | |
| '\\' | |
four_hex_digits ~ hex_digit hex_digit hex_digit hex_digit | |
hex_digit ~ [0-9a-fA-F] | |
comma ~ ',' | |
:discard ~ whitespace | |
whitespace ~ [\s]+ | |
END_OF_SOURCE | |
} | |
); | |
return $self; | |
} | |
sub parse { | |
my ($self, $string) = @_; | |
my $re = Marpa::R2::Scanless::R->new( { grammar => $self->{grammar} } ); | |
$re->read(\$string); | |
my $value_ref = $re->value(); | |
return ${$value_ref}; | |
} | |
sub parse_json { | |
my ($string) = @_; | |
my $parser = MarpaX::JSON->new(); | |
return $parser->parse($string); | |
} | |
package MarpaX::JSON::Actions; | |
use strict; | |
sub new { | |
my ($class) = @_; | |
return bless {}, $class; | |
} | |
sub do_first_arg { | |
shift; | |
return $_[0]; | |
} | |
sub do_empty_object { | |
return {}; | |
} | |
sub do_object { | |
shift; | |
return { map { @$_ } @{$_[1]} }; | |
} | |
sub do_empty_array { | |
return []; | |
} | |
sub do_array { | |
shift; | |
return $_[1]; | |
} | |
sub do_list { | |
shift; | |
return \@_; | |
} | |
sub do_pair { | |
shift; | |
return [ $_[0], $_[2] ]; | |
} | |
sub do_string { | |
shift; | |
my $s = $_[0]; | |
$s =~ s/^"//; | |
$s =~ s/"$//; | |
$s =~ s/\\u([0-9A-Fa-f]{4})/chr(hex($1))/eg; | |
$s =~ s/\\n/\n/g; | |
$s =~ s/\\r/\r/g; | |
$s =~ s/\\b/\b/g; | |
$s =~ s/\\f/\f/g; | |
$s =~ s/\\t/\t/g; | |
$s =~ s/\\\\/\\/g; | |
$s =~ s{\\/}{/}g; | |
$s =~ s{\\"}{"}g; | |
return $s; | |
} | |
sub do_true { | |
shift; | |
return $_[0] eq 'true'; | |
} | |
sub do_null { | |
return undef; | |
} | |
sub do_join { | |
shift; | |
return join '', @_; | |
} | |
1; |
use Test::More; | |
use Test::Exception; | |
use lib 'lib'; | |
use MarpaX::JSON; | |
my $data = MarpaX::JSON::parse_json(q${"test":"1"}$); | |
is($data->{test}, 1); | |
$data = MarpaX::JSON::parse_json(q${"test":[1,2,3]}$); | |
is_deeply($data->{test}, [1,2,3]); | |
$data = MarpaX::JSON::parse_json(q${"test":true}$); | |
is($data->{test}, 1); | |
$data = MarpaX::JSON::parse_json(q${"test":false}$); | |
is($data->{test}, ''); | |
$data = MarpaX::JSON::parse_json(q${"test":null}$); | |
is($data->{test}, undef); | |
$data = MarpaX::JSON::parse_json(q${"test":null, "test2":"hello world"}$); | |
is($data->{test}, undef); | |
is($data->{test2}, "hello world"); | |
$data = MarpaX::JSON::parse_json(q${"test":"1.25"}$); | |
is($data->{test}, '1.25'); | |
$data = MarpaX::JSON::parse_json(q${"test":"1.25e4"}$); | |
is($data->{test}, '1.25e4'); | |
$data = MarpaX::JSON::parse_json(q$[]$); | |
is_deeply($data, []); | |
$data = MarpaX::JSON::parse_json(<<'JSON'); | |
[ | |
{ | |
"precision": "zip", | |
"Latitude": 37.7668, | |
"Longitude": -122.3959, | |
"Address": "", | |
"City": "SAN FRANCISCO", | |
"State": "CA", | |
"Zip": "94107", | |
"Country": "US" | |
}, | |
{ | |
"precision": "zip", | |
"Latitude": 37.371991, | |
"Longitude": -122.026020, | |
"Address": "", | |
"City": "SUNNYVALE", | |
"State": "CA", | |
"Zip": "94085", | |
"Country": "US" | |
} | |
] | |
JSON | |
is_deeply($data, [ | |
{ "precision"=>"zip", Latitude => "37.7668", Longitude=>"-122.3959", | |
"Country" => "US", Zip => 94107, Address => '', | |
City => "SAN FRANCISCO", State => 'CA' }, | |
{ "precision" => "zip", Longitude => "-122.026020", Address => "", | |
City => "SUNNYVALE", Country => "US", Latitude => "37.371991", | |
Zip => 94085, State => "CA" } | |
]); | |
$data = MarpaX::JSON::parse_json(<<'JSON'); | |
{ | |
"Image": { | |
"Width": 800, | |
"Height": 600, | |
"Title": "View from 15th Floor", | |
"Thumbnail": { | |
"Url": "http://www.example.com/image/481989943", | |
"Height": 125, | |
"Width": "100" | |
}, | |
"IDs": [116, 943, 234, 38793] | |
} | |
} | |
JSON | |
is_deeply($data, { | |
"Image" => { | |
"Width" => 800, "Height" => 600, | |
"Title" => "View from 15th Floor", | |
"Thumbnail" => { | |
"Url" => "http://www.example.com/image/481989943", | |
"Height" => 125, | |
"Width" => 100, | |
}, | |
"IDs" => [ 116, 943, 234, 38793 ], | |
} | |
}); | |
$data = MarpaX::JSON::parse_json(<<'JSON'); | |
{ | |
"source" : "<a href=\"http://janetter.net/\" rel=\"nofollow\">Janetter</a>", | |
"entities" : { | |
"user_mentions" : [ { | |
"name" : "James Governor", | |
"screen_name" : "moankchips", | |
"indices" : [ 0, 10 ], | |
"id_str" : "61233", | |
"id" : 61233 | |
} ], | |
"media" : [ ], | |
"hashtags" : [ ], | |
"urls" : [ ] | |
}, | |
"in_reply_to_status_id_str" : "281400879465238529", | |
"geo" : { | |
}, | |
"id_str" : "281405942321532929", | |
"in_reply_to_user_id" : 61233, | |
"text" : "@monkchips Ouch. Some regrets are harsher than others.", | |
"id" : 281405942321532929, | |
"in_reply_to_status_id" : 281400879465238529, | |
"created_at" : "Wed Dec 19 14:29:39 +0000 2012", | |
"in_reply_to_screen_name" : "monkchips", | |
"in_reply_to_user_id_str" : "61233", | |
"user" : { | |
"name" : "Sarah Bourne", | |
"screen_name" : "sarahebourne", | |
"protected" : false, | |
"id_str" : "16010789", | |
"profile_image_url_https" : "https://si0.twimg.com/profile_images/638441870/Snapshot-of-sb_normal.jpg", | |
"id" : 16010789, | |
"verified" : false | |
} | |
} | |
JSON | |
$data = MarpaX::JSON::parse_json(<<'JSON'); | |
{ "test": "\u2603" } | |
JSON | |
is($data->{test}, "\x{2603}"); | |
dies_ok { | |
$data = MarpaX::JSON::parse_json(<<'JSON'); | |
{ "test": "éáóüöï" } | |
JSON | |
}, 'marpa scanless doesn\'t understand higher than 8-bit codepoints yet'; | |
done_testing(); |
There are probably some more problems with it at the moment. This is just an half hour experiment that worked out really great.
I only mentioned it because as cool as this is, I don't doubt it's already on its way to be included in some production code. You totally rock.
"number" should be a G0 rule:
-number ::= int
- | int frac action => do_join
- | int exp action => do_join
- | int frac exp action => do_join
+number ~ int
+ | int frac
+ | int exp
+ | int frac exp
Also, since it needs 2.039_000 to work, it is probably best to change
use Marpa::R2;
to
use Marpa::R2 2.039_000;
A structural rule with a join is a bit of a red flag. I'll change it.
@uberbaud Thanks. And I guess you're totally right about being used in production.
~ 20-30x slower than JSON::PP for simple JSON strings. Not bad for a half-hour work :)
BTW, the parser doesn't seem to report errors (it just returns undef) for cases like: "[", "[[", "{", q/"a/.
I've done some work on this, which is in my fork of this gist. (I'd create a pull request, but I cannot figure out how to do that for a gist -- in any case, you'll want to edit my work.)
It's now 10 times as fast. Still not quite as fast as JSON::PP, but a lot closer. And it's now an even simpler solution. One big change is to do all parse-time logic in C, creating an AST, and then create the data structure by fixing up the AST.
Very cool, only it should be
At least according to the RFC: "A JSON text is a serialized object or array."