arodland · August 29, 2015 14:15
diff --git a/length-prefixed.pl b/length-prefixed.pl
 #!/usr/bin/perl
 use 5.010;
 use strict;
 use warnings;

 use Marpa::R2 2.102;
 use Data::Dumper;

 # array: 'A' count ':' element+
 # string: 'S' length ':' byte+
 # element: array | string

 sub gen_counted_string {
  my ($name, $data_lexeme, $count_lexeme, $method) = @_;
  my $begin = $method ? "$name,$method" : $name;
  return qq{
    :lexeme ~ <$count_lexeme> pause => after event => 'counted_string_begin[$begin]'
    :lexeme ~ $data_lexeme pause => before event => 'counted_string_data[$name]'
    <$data_lexeme> ~ [\\x00-\\xff]
  };
 }

 sub gen_counted {
  my ($name, $element_rule, $count_lexeme, $method) = @_;
  my $begin = $method ? "$name,$method" : $name;
  return qq{
    event 'counted_item[$name]' = completed $element_rule
    event 'counted_check[$name]' = predicted $element_rule
    :lexeme ~ <$count_lexeme> pause => after event => 'counted_begin[$begin]'
    <end of $name> ~ [^\\d\\D]
  };
 }

 my $grammar = Marpa::R2::Scanless::G->new({
    source => \qq{
      :default ::= action => [values]
      lexeme default = latm => 1
      :start ::= document
      document ::= value action => ::first

      array ::= 'A' <array count> ':' elements <end of array> action => array

      elements ::= element*
      element ::= value action => ::first
      <array count> ~ [\\d]+

      @{[ gen_counted('array', 'element', 'array count') ]}

      hash ::= 'H' <hash count> ':' hash_elements <end of hash> action => hash

      hash_elements ::= hash_element*
      hash_element ::= string value action => hash_element
      <hash count> ~ [\\d]+

      @{[ gen_counted('hash', 'hash_element', 'hash count') ]}

      value ::= string action => ::first
      value ::= array action => ::first
      value ::= hash action => ::first

      string ::= 'S' <string length> ':' bytes action => string

      <string length> ~ [\\d]+

      @{[ gen_counted_string('string', 'bytes', 'string length') ]}
    },
 });

 my $slr = Marpa::R2::Scanless::R->new({
    grammar => $grammar,
    semantics_package => 'main',
 #    trace_terminals => 1,
 });

 sub string {
  return $_[4];
 }

 sub array {
  return $_[4];
 }

 sub hash_element {
  return [ $_[1], $_[2] ];
 }

 sub hash {
  my %ret;
  for my $element (@{ $_[4] }) {
    $ret{$element->[0]} = $element->[1];
  }
  return \%ret;
 }

 sub decimal {
  0 + $_[0];
 }

 my $input = 'A2:H1:S5:helloS5:worldS1:!';
 # This one will cause a parse error, as it should
 # (3 elements in a 2-element array)
 # my $input = 'A2:S5:helloS5:worldS5:extra';

 my (%count_stack, %string_len);

 INPUT: for(
  my $pos = $slr->read( \$input );
  $pos < length($input);
  $pos = $slr->resume($pos)
 ) {
  EVENTS: {
    my ($lexeme_start, $lexeme_length) = $slr->pause_span;
    for my $event (@{ $slr->events }) {
        my ($name) = @{$event};
        # string
        if ($name =~ /counted_string_begin\[(.*?)\]/) {
          my ($type, $method) = split /,/, $1;
          $method = 'decimal' unless defined $method;
          my $text = $slr->literal($lexeme_start, $lexeme_length);
          my $count = do { # TODO: something more sensible
            no strict 'refs'; $method->($text);
          };
          $string_len{$type} = $count;
          warn "Ready for a $type string of length $count\n";
        }
        elsif ($name =~ /counted_string_data\[(.*?)\]/) {
          my $type = $1;
          my $count = $string_len{$type};
          warn "Reading $count bytes at $lexeme_start\n";
          $slr->lexeme_read('bytes', $lexeme_start, $count);
          $pos = $slr->pos();
          redo EVENTS; # Reading bytes triggers "completed element"
        }
        elsif ($name =~ /counted_begin\[(.*?)\]/) {
          my ($type, $method) = split /,/, $1;
          $method = 'decimal' unless defined $method;
          my $text = $slr->literal($lexeme_start, $lexeme_length);
          my $count = do { # TODO: something more sensible
            no strict 'refs'; $method->($text);
          };
          push @{ $count_stack{$type} }, $count;
          warn "Beginning $type of $count items\n";
        }
        elsif ($name =~ /counted_item\[(.*?)\]/) {
          my $type = $1;
          $count_stack{$type}[-1] --;
          warn "Got a $type item, $count_stack{$type}[-1] left\n";
        }
        elsif ($name =~ /counted_check\[(.*?)\]/) {
          my $type = $1;
          if ($count_stack{$type}[-1] == 0) {
            warn "Completed $type\n";
            pop @{ $count_stack{$type} };
            $slr->lexeme_read("end of $type", $slr->pos, 0);
            redo EVENTS; # Finishing the array might have caused another "completed element"
          }
        }
    } # for my $event
  }
 }

 my $val = $slr->value;
 if ($$val) {
  print Dumper($$val);
 } else {
  print "Error";
 }
	#!/usr/bin/perl
	use 5.010;
	use strict;
	use warnings;

	use Marpa::R2 2.102;
	use Data::Dumper;

	# array: 'A' count ':' element+
	# string: 'S' length ':' byte+
	# element: array \| string

	sub gen_counted_string {
	my ($name, $data_lexeme, $count_lexeme, $method) = @_;
	my $begin = $method ? "$name,$method" : $name;
	return qq{
	:lexeme ~ <$count_lexeme> pause => after event => 'counted_string_begin[$begin]'
	:lexeme ~ $data_lexeme pause => before event => 'counted_string_data[$name]'
	<$data_lexeme> ~ [\\x00-\\xff]
	};
	}

	sub gen_counted {
	my ($name, $element_rule, $count_lexeme, $method) = @_;
	my $begin = $method ? "$name,$method" : $name;
	return qq{
	event 'counted_item[$name]' = completed $element_rule
	event 'counted_check[$name]' = predicted $element_rule
	:lexeme ~ <$count_lexeme> pause => after event => 'counted_begin[$begin]'
	<end of $name> ~ [^\\d\\D]
	};
	}

	my $grammar = Marpa::R2::Scanless::G->new({
	source => \qq{
	:default ::= action => [values]
	lexeme default = latm => 1
	:start ::= document
	document ::= value action => ::first

	array ::= 'A' <array count> ':' elements <end of array> action => array

	elements ::= element*
	element ::= value action => ::first
	<array count> ~ [\\d]+

	@{[ gen_counted('array', 'element', 'array count') ]}

	hash ::= 'H' <hash count> ':' hash_elements <end of hash> action => hash

	hash_elements ::= hash_element*
	hash_element ::= string value action => hash_element
	<hash count> ~ [\\d]+

	@{[ gen_counted('hash', 'hash_element', 'hash count') ]}

	value ::= string action => ::first
	value ::= array action => ::first
	value ::= hash action => ::first

	string ::= 'S' <string length> ':' bytes action => string

	<string length> ~ [\\d]+

	@{[ gen_counted_string('string', 'bytes', 'string length') ]}
	},
	});

	my $slr = Marpa::R2::Scanless::R->new({
	grammar => $grammar,
	semantics_package => 'main',
	# trace_terminals => 1,
	});

	sub string {
	return $_[4];
	}

	sub array {
	return $_[4];
	}

	sub hash_element {
	return [ $_[1], $_[2] ];
	}

	sub hash {
	my %ret;
	for my $element (@{ $_[4] }) {
	$ret{$element->[0]} = $element->[1];
	}
	return \%ret;
	}

	sub decimal {
	0 + $_[0];
	}

	my $input = 'A2:H1:S5:helloS5:worldS1:!';
	# This one will cause a parse error, as it should
	# (3 elements in a 2-element array)
	# my $input = 'A2:S5:helloS5:worldS5:extra';

	my (%count_stack, %string_len);

	INPUT: for(
	my $pos = $slr->read( \$input );
	$pos < length($input);
	$pos = $slr->resume($pos)
	) {
	EVENTS: {
	my ($lexeme_start, $lexeme_length) = $slr->pause_span;
	for my $event (@{ $slr->events }) {
	my ($name) = @{$event};
	# string
	if ($name =~ /counted_string_begin\[(.*?)\]/) {
	my ($type, $method) = split /,/, $1;
	$method = 'decimal' unless defined $method;
	my $text = $slr->literal($lexeme_start, $lexeme_length);
	my $count = do { # TODO: something more sensible
	no strict 'refs'; $method->($text);
	};
	$string_len{$type} = $count;
	warn "Ready for a $type string of length $count\n";
	}
	elsif ($name =~ /counted_string_data\[(.*?)\]/) {
	my $type = $1;
	my $count = $string_len{$type};
	warn "Reading $count bytes at $lexeme_start\n";
	$slr->lexeme_read('bytes', $lexeme_start, $count);
	$pos = $slr->pos();
	redo EVENTS; # Reading bytes triggers "completed element"
	}
	elsif ($name =~ /counted_begin\[(.*?)\]/) {
	my ($type, $method) = split /,/, $1;
	$method = 'decimal' unless defined $method;
	my $text = $slr->literal($lexeme_start, $lexeme_length);
	my $count = do { # TODO: something more sensible
	no strict 'refs'; $method->($text);
	};
	push @{ $count_stack{$type} }, $count;
	warn "Beginning $type of $count items\n";
	}
	elsif ($name =~ /counted_item\[(.*?)\]/) {
	my $type = $1;
	$count_stack{$type}[-1] --;
	warn "Got a $type item, $count_stack{$type}[-1] left\n";
	}
	elsif ($name =~ /counted_check\[(.*?)\]/) {
	my $type = $1;
	if ($count_stack{$type}[-1] == 0) {
	warn "Completed $type\n";
	pop @{ $count_stack{$type} };
	$slr->lexeme_read("end of $type", $slr->pos, 0);
	redo EVENTS; # Finishing the array might have caused another "completed element"
	}
	}
	} # for my $event
	}
	}

	my $val = $slr->value;
	if ($$val) {
	print Dumper($$val);
	} else {
	print "Error";
	}