rns · September 18, 2014 04:20
diff --git a/timeflies_q25903112.out b/timeflies_q25903112.out
  0-25 R : Time flies like an arrow.      <- S ::= NP VP period
  0- 4 R : Time                           <- NP ::= NN
 # token
  0- 4 T : Time                           <- NN
 # glade   0- 4: Time ---
 - (NP (NN Time))

  5-24 R : flies like an arrow            <- VP ::= VBZ PP
 # token
  5-10 T : flies                          <- VBZ
 11-24 R : like an arrow                  <- PP ::= IN NP
 # token
 11-15 T : like                           <- IN
 16-24 R : an arrow                       <- NP ::= DT NN
 # token
 16-18 T : an                             <- DT
 # token
 19-24 T : arrow                          <- NN
 # glade  16-24: an arrow ---
 - (NP (DT an) (NN arrow))

 # glade  11-24: like an arrow ---
 - (PP (IN like) (NP (DT an) (NN arrow)))

 # glade   5-24: flies like an arrow ---
 - (VP (VBZ flies) (PP (IN like) (NP (DT an) (NN arrow))))

 # token
 24-25 T : .                              <- period
  0-10 R : Time flies                     <- NP ::= NN NNS
 # token
  0- 4 T : Time                           <- NN
 # token
  5-10 T : flies                          <- NNS
 # glade   0-10: Time flies ---
 - (NP (NN Time) (NNS flies))

 11-24 R : like an arrow                  <- VP ::= VBP NP
 # token
 11-15 T : like                           <- VBP
 # glade  11-24: like an arrow ---
 - (VP (VBP like) (NP (DT an) (NN arrow)))

 # glade   0-25: Time flies like an arrow. ---
 - |-
  (S (NP (NN Time))
     (VP (VBZ flies) (PP (IN like) (NP (DT an) (NN arrow))))
     (period .))
 - |-
  (S (NP (NN Time) (NNS flies))
     (VP (VBP like) (NP (DT an) (NN arrow)))
     (period .))

  0-26 R : Fruit flies like a banana.     <- S ::= NP VP period
  0- 5 R : Fruit                          <- NP ::= NN
 # token
  0- 5 T : Fruit                          <- NN
 # glade   0- 5: Fruit ---
 - (NP (NN Fruit))

  6-25 R : flies like a banana            <- VP ::= VBZ PP
 # token
  6-11 T : flies                          <- VBZ
 12-25 R : like a banana                  <- PP ::= IN NP
 # token
 12-16 T : like                           <- IN
 17-25 R : a banana                       <- NP ::= DT NN
 # token
 17-18 T : a                              <- DT
 # token
 19-25 T : banana                         <- NN
 # glade  17-25: a banana ---
 - (NP (DT a) (NN banana))

 # glade  12-25: like a banana ---
 - (PP (IN like) (NP (DT a) (NN banana)))

 # glade   6-25: flies like a banana ---
 - (VP (VBZ flies) (PP (IN like) (NP (DT a) (NN banana))))

 # token
 25-26 T : .                              <- period
  0-11 R : Fruit flies                    <- NP ::= NN NNS
 # token
  0- 5 T : Fruit                          <- NN
 # token
  6-11 T : flies                          <- NNS
 # glade   0-11: Fruit flies ---
 - (NP (NN Fruit) (NNS flies))

 12-25 R : like a banana                  <- VP ::= VBP NP
 # token
 12-16 T : like                           <- VBP
 # glade  12-25: like a banana ---
 - (VP (VBP like) (NP (DT a) (NN banana)))

 # glade   0-26: Fruit flies like a banana. ---
 - |-
  (S (NP (NN Fruit))
     (VP (VBZ flies) (PP (IN like) (NP (DT a) (NN banana))))
     (period .))
 - |-
  (S (NP (NN Fruit) (NNS flies))
     (VP (VBP like) (NP (DT a) (NN banana)))
     (period .))
diff --git a/timeflies_q25903112.pl b/timeflies_q25903112.pl
 #!/usr/bin/perl
 # Copyright 2013 Jeffrey Kegler
 # This file is part of Marpa::R2.  Marpa::R2 is free software: you can
 # redistribute it and/or modify it under the terms of the GNU Lesser
 # General Public License as published by the Free Software Foundation,
 # either version 3 of the License, or (at your option) any later version.
 #
 # Marpa::R2 is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 # Lesser General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser
 # General Public License along with Marpa::R2.  If not, see
 # http://www.gnu.org/licenses/.

 # This example parses ambiguous English sentences.  The target annotation
 # is Penn Treebank's syntactic bracketing tags.  For details, see
 # http://www.cis.upenn.edu/~treebank/

 # This example originally came from Ralf Muschall.  Ruslan Shvedov
 # reworked my implementation, converting it to the SLIF and
 # Penn Treebank.  Ruslan and Ralf clearly know English grammar better than
 # most of us native speakers.

 # 'time', 'fruit', and 'flies' can be nouns or verbs, 'like' can be
 # a preposition or a verb.  This creates syntactic ambiguity shown 
 # in the parse results.

 # Modifier nouns are not tagged or lexed as adjectives (JJ), because
 # "Nouns that are used as modifiers, whether in isolation or in sequences,
 # should be tagged as nouns (NN, NNS) rather than as adjectives (JJ)."
 # -- ftp://ftp.cis.upenn.edu/pub/treebank/doc/tagguide.ps.gz

 # The saying "time flies like an arrow; fruit flies like a banana"
 # is attributed to Groucho Marx, but there is no reason to believe
 # he ever said it.  Apparently, the saying
 # first appeared on the Usenet on net.jokes in 1982.
 # I've documented this whole thing on Wikipedia:
 # http://en.wikipedia.org/wiki/Time_flies_like_an_arrow
 #
 # The permalink is:
 # http://en.wikipedia.org/w/index.php?title=Time_flies_like_an_arrow&oldid=311163283

 use 5.010;
 use strict;
 use warnings;

 use Marpa::R2;

 use YAML;

 my $grammar = Marpa::R2::Scanless::G->new(
    {   
        source => \(<<'END_OF_SOURCE'),
        
 :default ::= action => [ name, values ]
 lexeme default = action => [ name, value ]

 S   ::= NP  VP  period

 NP  ::= NN            
    |   JJ  NN        
    |   DT  NN        
    |   NN  NNS       

 VP  ::= VBP NP        
    |   VBP PP        
    |   VBZ PP        
    |   VBZ RB        

 PP  ::= IN  NP        
    
 period ~ '.'

 :discard ~ whitespace
 whitespace ~ [\s]+

 DT  ~ 'a' | 'an'
 NN  ~ 'arrow' | 'banana'
 NNS ~ 'flies'
 NNS ~ 'bananas'
 VBZ ~ 'flies' 
 NN  ~ 'fruit':i
 VBP ~ 'fruit':i
 IN  ~ 'like'
 VBP ~ 'like'
 NN  ~ 'time':i
 VBP ~ 'time':i
 RB  ~ 'fast'
 VBP ~ 'fast'
 JJ  ~ 'fast'
 NN  ~ 'fast'
 VBP ~ 'spoil'

 END_OF_SOURCE
    }
 );

 my $expected = <<'EOS';
 [To be written]
 EOS

 my $paragraph = <<END_OF_PARAGRAPH;
 Time flies like an arrow.
 Time flies fast.
 Fruit flies like a banana.
 Fruit flies spoil banana.
 END_OF_PARAGRAPH

 my $recce;

 for my $sentence (split /\n/, $paragraph){

    $recce = Marpa::R2::Scanless::R->new( { grammar => $grammar  } );

    $recce->read( \$sentence );

    if ( $recce->ambiguity_metric() > 1 ) {
        
        my $asf = Marpa::R2::ASF->new( { slr => $recce } );
        die 'No ASF' if not defined $asf;
        $asf->traverse( {}, \&traverser );
        
    }
 }

 sub all_choices{
    my $glade = shift;
    # The results at each position are a list of choices, so
    # to produce a new result list, we need to take a Cartesian
    # product of all the choices
    my $length = $glade->rh_length();
    my @results = ( [] );
    for my $rh_ix ( 0 .. $length - 1 ) {
        my @new_results = ();
        for my $old_result (@results) {
            my $child_value = $glade->rh_value($rh_ix);
            for my $new_value ( @{ $child_value } ) {
                push @new_results, [ @{$old_result}, $new_value ];
            }
        }
        @results = @new_results;
    } ## end for my $rh_ix ( 0 .. $length - 1 )
    return @results;
 }

 sub traverser {

    my ($glade, $scratch)     = @_;
    
    # get rule and symbol IDs
    my $rule_id     = $glade->rule_id();

    my $symbol_id   = $glade->symbol_id();
    my $symbol_name = $grammar->symbol_name($symbol_id);
    my $display_form  = $grammar->symbol_display_form($symbol_id);

    # get span
    my ( $start, $length ) = $glade->span();
    my $literal = $glade->literal();

    # set glade's prefix
    my $prefix = sprintf "%3s-%2s", $start, $start + $length;

    # check for tokens
    if (not defined $rule_id){
        say "# token\n", $prefix, " T : ", sprintf("%-30s", $literal), " <- $display_form";
        return ["($symbol_name $literal)"];
    }
    
    # get rule expansion
    unless ( $symbol_name eq '[:start]' ){
        my ($lhs_id, @rhs_ids) = 
            map { $grammar->symbol_display_form($_) } 
                $grammar->rule_expand($rule_id);
        my $rule = "$lhs_id ::= " . join q{ }, @rhs_ids;

        # rule
        say $prefix, " R : ", sprintf("%-30s", $literal), " <- $rule";
    }
    
    my @return_value;
    
    CHOICE: while (1) {

        # The results at each position are a list of choices, so
        # to produce a new result list, we need to take a Cartesian
        # product of all the choices
        my @results = all_choices( $glade );

        # Special case for the start rule
        if ( $symbol_name eq '[:start]' ) {
            my $retval = [ map { join q{}, @{$_} } @results ];
 #            say Dump $retval;
            return $retval;
        }

        # Now we have a list of choices, as a list of lists.  Each sub list
        # is a list of tokens, which we need to join into
        # a single token.  The result will be to collapse
        # one level of lists, and leave us with a list of Penn-tagged
        # elements
        my $join_ws = q{ };
        $join_ws = qq{\n   } if $symbol_name eq 'S';
        my $push_val = [ map { '(' . $symbol_name . q{ } . ( join $join_ws, @{$_} ) . ')' }
                           @results ];
 #        say "# choice$prefix ", Dump $push_val;
        push @return_value, @$push_val;

        # Look at the next alternative in this glade, or end the
        # loop if there is none
        last CHOICE if not defined $glade->next();

    } ## end CHOICE: while (1)

    # Return the list of tokens for this glade
    say "# glade $prefix: $literal ", Dump \@return_value;
    return \@return_value;

 } ## end sub traverser
	0-25 R : Time flies like an arrow. <- S ::= NP VP period
	0- 4 R : Time <- NP ::= NN
	# token
	0- 4 T : Time <- NN
	# glade 0- 4: Time ---
	- (NP (NN Time))

	5-24 R : flies like an arrow <- VP ::= VBZ PP
	# token
	5-10 T : flies <- VBZ
	11-24 R : like an arrow <- PP ::= IN NP
	# token
	11-15 T : like <- IN
	16-24 R : an arrow <- NP ::= DT NN
	# token
	16-18 T : an <- DT
	# token
	19-24 T : arrow <- NN
	# glade 16-24: an arrow ---
	- (NP (DT an) (NN arrow))

	# glade 11-24: like an arrow ---
	- (PP (IN like) (NP (DT an) (NN arrow)))

	# glade 5-24: flies like an arrow ---
	- (VP (VBZ flies) (PP (IN like) (NP (DT an) (NN arrow))))

	# token
	24-25 T : . <- period
	0-10 R : Time flies <- NP ::= NN NNS
	# token
	0- 4 T : Time <- NN
	# token
	5-10 T : flies <- NNS
	# glade 0-10: Time flies ---
	- (NP (NN Time) (NNS flies))

	11-24 R : like an arrow <- VP ::= VBP NP
	# token
	11-15 T : like <- VBP
	# glade 11-24: like an arrow ---
	- (VP (VBP like) (NP (DT an) (NN arrow)))

	# glade 0-25: Time flies like an arrow. ---
	- \|-
	(S (NP (NN Time))
	(VP (VBZ flies) (PP (IN like) (NP (DT an) (NN arrow))))
	(period .))
	- \|-
	(S (NP (NN Time) (NNS flies))
	(VP (VBP like) (NP (DT an) (NN arrow)))
	(period .))

	0-26 R : Fruit flies like a banana. <- S ::= NP VP period
	0- 5 R : Fruit <- NP ::= NN
	# token
	0- 5 T : Fruit <- NN
	# glade 0- 5: Fruit ---
	- (NP (NN Fruit))

	6-25 R : flies like a banana <- VP ::= VBZ PP
	# token
	6-11 T : flies <- VBZ
	12-25 R : like a banana <- PP ::= IN NP
	# token
	12-16 T : like <- IN
	17-25 R : a banana <- NP ::= DT NN
	# token
	17-18 T : a <- DT
	# token
	19-25 T : banana <- NN
	# glade 17-25: a banana ---
	- (NP (DT a) (NN banana))

	# glade 12-25: like a banana ---
	- (PP (IN like) (NP (DT a) (NN banana)))

	# glade 6-25: flies like a banana ---
	- (VP (VBZ flies) (PP (IN like) (NP (DT a) (NN banana))))

	# token
	25-26 T : . <- period
	0-11 R : Fruit flies <- NP ::= NN NNS
	# token
	0- 5 T : Fruit <- NN
	# token
	6-11 T : flies <- NNS
	# glade 0-11: Fruit flies ---
	- (NP (NN Fruit) (NNS flies))

	12-25 R : like a banana <- VP ::= VBP NP
	# token
	12-16 T : like <- VBP
	# glade 12-25: like a banana ---
	- (VP (VBP like) (NP (DT a) (NN banana)))

	# glade 0-26: Fruit flies like a banana. ---
	- \|-
	(S (NP (NN Fruit))
	(VP (VBZ flies) (PP (IN like) (NP (DT a) (NN banana))))
	(period .))
	- \|-
	(S (NP (NN Fruit) (NNS flies))
	(VP (VBP like) (NP (DT a) (NN banana)))
	(period .))
	#!/usr/bin/perl
	# Copyright 2013 Jeffrey Kegler
	# This file is part of Marpa::R2. Marpa::R2 is free software: you can
	# redistribute it and/or modify it under the terms of the GNU Lesser
	# General Public License as published by the Free Software Foundation,
	# either version 3 of the License, or (at your option) any later version.
	#
	# Marpa::R2 is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	# Lesser General Public License for more details.
	#
	# You should have received a copy of the GNU Lesser
	# General Public License along with Marpa::R2. If not, see
	# http://www.gnu.org/licenses/.

	# This example parses ambiguous English sentences. The target annotation
	# is Penn Treebank's syntactic bracketing tags. For details, see
	# http://www.cis.upenn.edu/~treebank/

	# This example originally came from Ralf Muschall. Ruslan Shvedov
	# reworked my implementation, converting it to the SLIF and
	# Penn Treebank. Ruslan and Ralf clearly know English grammar better than
	# most of us native speakers.

	# 'time', 'fruit', and 'flies' can be nouns or verbs, 'like' can be
	# a preposition or a verb. This creates syntactic ambiguity shown
	# in the parse results.

	# Modifier nouns are not tagged or lexed as adjectives (JJ), because
	# "Nouns that are used as modifiers, whether in isolation or in sequences,
	# should be tagged as nouns (NN, NNS) rather than as adjectives (JJ)."
	# -- ftp://ftp.cis.upenn.edu/pub/treebank/doc/tagguide.ps.gz

	# The saying "time flies like an arrow; fruit flies like a banana"
	# is attributed to Groucho Marx, but there is no reason to believe
	# he ever said it. Apparently, the saying
	# first appeared on the Usenet on net.jokes in 1982.
	# I've documented this whole thing on Wikipedia:
	# http://en.wikipedia.org/wiki/Time_flies_like_an_arrow
	#
	# The permalink is:
	# http://en.wikipedia.org/w/index.php?title=Time_flies_like_an_arrow&oldid=311163283

	use 5.010;
	use strict;
	use warnings;

	use Marpa::R2;

	use YAML;

	my $grammar = Marpa::R2::Scanless::G->new(
	{
	source => \(<<'END_OF_SOURCE'),

	:default ::= action => [ name, values ]
	lexeme default = action => [ name, value ]

	S ::= NP VP period

	NP ::= NN
	\| JJ NN
	\| DT NN
	\| NN NNS

	VP ::= VBP NP
	\| VBP PP
	\| VBZ PP
	\| VBZ RB

	PP ::= IN NP

	period ~ '.'

	:discard ~ whitespace
	whitespace ~ [\s]+

	DT ~ 'a' \| 'an'
	NN ~ 'arrow' \| 'banana'
	NNS ~ 'flies'
	NNS ~ 'bananas'
	VBZ ~ 'flies'
	NN ~ 'fruit':i
	VBP ~ 'fruit':i
	IN ~ 'like'
	VBP ~ 'like'
	NN ~ 'time':i
	VBP ~ 'time':i
	RB ~ 'fast'
	VBP ~ 'fast'
	JJ ~ 'fast'
	NN ~ 'fast'
	VBP ~ 'spoil'

	END_OF_SOURCE
	}
	);

	my $expected = <<'EOS';
	[To be written]
	EOS

	my $paragraph = <<END_OF_PARAGRAPH;
	Time flies like an arrow.
	Time flies fast.
	Fruit flies like a banana.
	Fruit flies spoil banana.
	END_OF_PARAGRAPH

	my $recce;

	for my $sentence (split /\n/, $paragraph){

	$recce = Marpa::R2::Scanless::R->new( { grammar => $grammar } );

	$recce->read( \$sentence );

	if ( $recce->ambiguity_metric() > 1 ) {

	my $asf = Marpa::R2::ASF->new( { slr => $recce } );
	die 'No ASF' if not defined $asf;
	$asf->traverse( {}, \&traverser );

	}
	}

	sub all_choices{
	my $glade = shift;
	# The results at each position are a list of choices, so
	# to produce a new result list, we need to take a Cartesian
	# product of all the choices
	my $length = $glade->rh_length();
	my @results = ( [] );
	for my $rh_ix ( 0 .. $length - 1 ) {
	my @new_results = ();
	for my $old_result (@results) {
	my $child_value = $glade->rh_value($rh_ix);
	for my $new_value ( @{ $child_value } ) {
	push @new_results, [ @{$old_result}, $new_value ];
	}
	}
	@results = @new_results;
	} ## end for my $rh_ix ( 0 .. $length - 1 )
	return @results;
	}

	sub traverser {

	my ($glade, $scratch) = @_;

	# get rule and symbol IDs
	my $rule_id = $glade->rule_id();

	my $symbol_id = $glade->symbol_id();
	my $symbol_name = $grammar->symbol_name($symbol_id);
	my $display_form = $grammar->symbol_display_form($symbol_id);

	# get span
	my ( $start, $length ) = $glade->span();
	my $literal = $glade->literal();

	# set glade's prefix
	my $prefix = sprintf "%3s-%2s", $start, $start + $length;

	# check for tokens
	if (not defined $rule_id){
	say "# token\n", $prefix, " T : ", sprintf("%-30s", $literal), " <- $display_form";
	return ["($symbol_name $literal)"];
	}

	# get rule expansion
	unless ( $symbol_name eq '[:start]' ){
	my ($lhs_id, @rhs_ids) =
	map { $grammar->symbol_display_form($_) }
	$grammar->rule_expand($rule_id);
	my $rule = "$lhs_id ::= " . join q{ }, @rhs_ids;

	# rule
	say $prefix, " R : ", sprintf("%-30s", $literal), " <- $rule";
	}

	my @return_value;

	CHOICE: while (1) {

	# The results at each position are a list of choices, so
	# to produce a new result list, we need to take a Cartesian
	# product of all the choices
	my @results = all_choices( $glade );

	# Special case for the start rule
	if ( $symbol_name eq '[:start]' ) {
	my $retval = [ map { join q{}, @{$_} } @results ];
	# say Dump $retval;
	return $retval;
	}

	# Now we have a list of choices, as a list of lists. Each sub list
	# is a list of tokens, which we need to join into
	# a single token. The result will be to collapse
	# one level of lists, and leave us with a list of Penn-tagged
	# elements
	my $join_ws = q{ };
	$join_ws = qq{\n } if $symbol_name eq 'S';
	my $push_val = [ map { '(' . $symbol_name . q{ } . ( join $join_ws, @{$_} ) . ')' }
	@results ];
	# say "# choice$prefix ", Dump $push_val;
	push @return_value, @$push_val;

	# Look at the next alternative in this glade, or end the
	# loop if there is none
	last CHOICE if not defined $glade->next();

	} ## end CHOICE: while (1)

	# Return the list of tokens for this glade
	say "# glade $prefix: $literal ", Dump \@return_value;
	return \@return_value;

	} ## end sub traverser