-
-
Save arodland/0c98f2670e499fd27a6b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
use 5.010; | |
use strict; | |
use warnings; | |
use Marpa::R2 2.102; | |
use Data::Dumper; | |
# array: 'A' count ':' element+ | |
# string: 'S' length ':' byte+ | |
# element: array | string | |
sub gen_counted_string { | |
my ($name, $data_lexeme, $count_lexeme, $method) = @_; | |
my $begin = $method ? "$name,$method" : $name; | |
return qq{ | |
:lexeme ~ <$count_lexeme> pause => after event => 'counted_string_begin[$begin]' | |
:lexeme ~ $data_lexeme pause => before event => 'counted_string_data[$name]' | |
<$data_lexeme> ~ [\\x00-\\xff] | |
}; | |
} | |
sub gen_counted { | |
my ($name, $element_rule, $count_lexeme, $method) = @_; | |
my $begin = $method ? "$name,$method" : $name; | |
return qq{ | |
event 'counted_item[$name]' = completed $element_rule | |
event 'counted_check[$name]' = predicted $element_rule | |
:lexeme ~ <$count_lexeme> pause => after event => 'counted_begin[$begin]' | |
<end of $name> ~ [^\\d\\D] | |
}; | |
} | |
my $grammar = Marpa::R2::Scanless::G->new({ | |
source => \qq{ | |
:default ::= action => [values] | |
lexeme default = latm => 1 | |
:start ::= document | |
document ::= value action => ::first | |
array ::= 'A' <array count> ':' elements <end of array> action => array | |
elements ::= element* | |
element ::= value action => ::first | |
<array count> ~ [\\d]+ | |
@{[ gen_counted('array', 'element', 'array count') ]} | |
hash ::= 'H' <hash count> ':' hash_elements <end of hash> action => hash | |
hash_elements ::= hash_element* | |
hash_element ::= string value action => hash_element | |
<hash count> ~ [\\d]+ | |
@{[ gen_counted('hash', 'hash_element', 'hash count') ]} | |
value ::= string action => ::first | |
value ::= array action => ::first | |
value ::= hash action => ::first | |
string ::= 'S' <string length> ':' bytes action => string | |
<string length> ~ [\\d]+ | |
@{[ gen_counted_string('string', 'bytes', 'string length') ]} | |
}, | |
}); | |
my $slr = Marpa::R2::Scanless::R->new({ | |
grammar => $grammar, | |
semantics_package => 'main', | |
# trace_terminals => 1, | |
}); | |
sub string { | |
return $_[4]; | |
} | |
sub array { | |
return $_[4]; | |
} | |
sub hash_element { | |
return [ $_[1], $_[2] ]; | |
} | |
sub hash { | |
my %ret; | |
for my $element (@{ $_[4] }) { | |
$ret{$element->[0]} = $element->[1]; | |
} | |
return \%ret; | |
} | |
sub decimal { | |
0 + $_[0]; | |
} | |
my $input = 'A2:H1:S5:helloS5:worldS1:!'; | |
# This one will cause a parse error, as it should | |
# (3 elements in a 2-element array) | |
# my $input = 'A2:S5:helloS5:worldS5:extra'; | |
my (%count_stack, %string_len); | |
INPUT: for( | |
my $pos = $slr->read( \$input ); | |
$pos < length($input); | |
$pos = $slr->resume($pos) | |
) { | |
EVENTS: { | |
my ($lexeme_start, $lexeme_length) = $slr->pause_span; | |
for my $event (@{ $slr->events }) { | |
my ($name) = @{$event}; | |
# string | |
if ($name =~ /counted_string_begin\[(.*?)\]/) { | |
my ($type, $method) = split /,/, $1; | |
$method = 'decimal' unless defined $method; | |
my $text = $slr->literal($lexeme_start, $lexeme_length); | |
my $count = do { # TODO: something more sensible | |
no strict 'refs'; $method->($text); | |
}; | |
$string_len{$type} = $count; | |
warn "Ready for a $type string of length $count\n"; | |
} | |
elsif ($name =~ /counted_string_data\[(.*?)\]/) { | |
my $type = $1; | |
my $count = $string_len{$type}; | |
warn "Reading $count bytes at $lexeme_start\n"; | |
$slr->lexeme_read('bytes', $lexeme_start, $count); | |
$pos = $slr->pos(); | |
redo EVENTS; # Reading bytes triggers "completed element" | |
} | |
elsif ($name =~ /counted_begin\[(.*?)\]/) { | |
my ($type, $method) = split /,/, $1; | |
$method = 'decimal' unless defined $method; | |
my $text = $slr->literal($lexeme_start, $lexeme_length); | |
my $count = do { # TODO: something more sensible | |
no strict 'refs'; $method->($text); | |
}; | |
push @{ $count_stack{$type} }, $count; | |
warn "Beginning $type of $count items\n"; | |
} | |
elsif ($name =~ /counted_item\[(.*?)\]/) { | |
my $type = $1; | |
$count_stack{$type}[-1] --; | |
warn "Got a $type item, $count_stack{$type}[-1] left\n"; | |
} | |
elsif ($name =~ /counted_check\[(.*?)\]/) { | |
my $type = $1; | |
if ($count_stack{$type}[-1] == 0) { | |
warn "Completed $type\n"; | |
pop @{ $count_stack{$type} }; | |
$slr->lexeme_read("end of $type", $slr->pos, 0); | |
redo EVENTS; # Finishing the array might have caused another "completed element" | |
} | |
} | |
} # for my $event | |
} | |
} | |
my $val = $slr->value; | |
if ($$val) { | |
print Dumper($$val); | |
} else { | |
print "Error"; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment