Skip to content

Instantly share code, notes, and snippets.

@hernan604
Forked from rns/timeflies_q25903112.out
Created December 12, 2023 01:03
Show Gist options
  • Save hernan604/0c965f692131886ca23bec06068688dc to your computer and use it in GitHub Desktop.
Save hernan604/0c965f692131886ca23bec06068688dc to your computer and use it in GitHub Desktop.
0-25 R : Time flies like an arrow. <- S ::= NP VP period
0- 4 R : Time <- NP ::= NN
# token
0- 4 T : Time <- NN
# glade 0- 4: Time ---
- (NP (NN Time))
5-24 R : flies like an arrow <- VP ::= VBZ PP
# token
5-10 T : flies <- VBZ
11-24 R : like an arrow <- PP ::= IN NP
# token
11-15 T : like <- IN
16-24 R : an arrow <- NP ::= DT NN
# token
16-18 T : an <- DT
# token
19-24 T : arrow <- NN
# glade 16-24: an arrow ---
- (NP (DT an) (NN arrow))
# glade 11-24: like an arrow ---
- (PP (IN like) (NP (DT an) (NN arrow)))
# glade 5-24: flies like an arrow ---
- (VP (VBZ flies) (PP (IN like) (NP (DT an) (NN arrow))))
# token
24-25 T : . <- period
0-10 R : Time flies <- NP ::= NN NNS
# token
0- 4 T : Time <- NN
# token
5-10 T : flies <- NNS
# glade 0-10: Time flies ---
- (NP (NN Time) (NNS flies))
11-24 R : like an arrow <- VP ::= VBP NP
# token
11-15 T : like <- VBP
# glade 11-24: like an arrow ---
- (VP (VBP like) (NP (DT an) (NN arrow)))
# glade 0-25: Time flies like an arrow. ---
- |-
(S (NP (NN Time))
(VP (VBZ flies) (PP (IN like) (NP (DT an) (NN arrow))))
(period .))
- |-
(S (NP (NN Time) (NNS flies))
(VP (VBP like) (NP (DT an) (NN arrow)))
(period .))
0-26 R : Fruit flies like a banana. <- S ::= NP VP period
0- 5 R : Fruit <- NP ::= NN
# token
0- 5 T : Fruit <- NN
# glade 0- 5: Fruit ---
- (NP (NN Fruit))
6-25 R : flies like a banana <- VP ::= VBZ PP
# token
6-11 T : flies <- VBZ
12-25 R : like a banana <- PP ::= IN NP
# token
12-16 T : like <- IN
17-25 R : a banana <- NP ::= DT NN
# token
17-18 T : a <- DT
# token
19-25 T : banana <- NN
# glade 17-25: a banana ---
- (NP (DT a) (NN banana))
# glade 12-25: like a banana ---
- (PP (IN like) (NP (DT a) (NN banana)))
# glade 6-25: flies like a banana ---
- (VP (VBZ flies) (PP (IN like) (NP (DT a) (NN banana))))
# token
25-26 T : . <- period
0-11 R : Fruit flies <- NP ::= NN NNS
# token
0- 5 T : Fruit <- NN
# token
6-11 T : flies <- NNS
# glade 0-11: Fruit flies ---
- (NP (NN Fruit) (NNS flies))
12-25 R : like a banana <- VP ::= VBP NP
# token
12-16 T : like <- VBP
# glade 12-25: like a banana ---
- (VP (VBP like) (NP (DT a) (NN banana)))
# glade 0-26: Fruit flies like a banana. ---
- |-
(S (NP (NN Fruit))
(VP (VBZ flies) (PP (IN like) (NP (DT a) (NN banana))))
(period .))
- |-
(S (NP (NN Fruit) (NNS flies))
(VP (VBP like) (NP (DT a) (NN banana)))
(period .))
#!/usr/bin/perl
# Copyright 2013 Jeffrey Kegler
# This file is part of Marpa::R2. Marpa::R2 is free software: you can
# redistribute it and/or modify it under the terms of the GNU Lesser
# General Public License as published by the Free Software Foundation,
# either version 3 of the License, or (at your option) any later version.
#
# Marpa::R2 is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser
# General Public License along with Marpa::R2. If not, see
# http://www.gnu.org/licenses/.
# This example parses ambiguous English sentences. The target annotation
# is Penn Treebank's syntactic bracketing tags. For details, see
# http://www.cis.upenn.edu/~treebank/
# This example originally came from Ralf Muschall. Ruslan Shvedov
# reworked my implementation, converting it to the SLIF and
# Penn Treebank. Ruslan and Ralf clearly know English grammar better than
# most of us native speakers.
# 'time', 'fruit', and 'flies' can be nouns or verbs, 'like' can be
# a preposition or a verb. This creates syntactic ambiguity shown
# in the parse results.
# Modifier nouns are not tagged or lexed as adjectives (JJ), because
# "Nouns that are used as modifiers, whether in isolation or in sequences,
# should be tagged as nouns (NN, NNS) rather than as adjectives (JJ)."
# -- ftp://ftp.cis.upenn.edu/pub/treebank/doc/tagguide.ps.gz
# The saying "time flies like an arrow; fruit flies like a banana"
# is attributed to Groucho Marx, but there is no reason to believe
# he ever said it. Apparently, the saying
# first appeared on the Usenet on net.jokes in 1982.
# I've documented this whole thing on Wikipedia:
# http://en.wikipedia.org/wiki/Time_flies_like_an_arrow
#
# The permalink is:
# http://en.wikipedia.org/w/index.php?title=Time_flies_like_an_arrow&oldid=311163283
use 5.010;
use strict;
use warnings;
use Marpa::R2;
use YAML;
my $grammar = Marpa::R2::Scanless::G->new(
{
source => \(<<'END_OF_SOURCE'),
:default ::= action => [ name, values ]
lexeme default = action => [ name, value ]
S ::= NP VP period
NP ::= NN
| JJ NN
| DT NN
| NN NNS
VP ::= VBP NP
| VBP PP
| VBZ PP
| VBZ RB
PP ::= IN NP
period ~ '.'
:discard ~ whitespace
whitespace ~ [\s]+
DT ~ 'a' | 'an'
NN ~ 'arrow' | 'banana'
NNS ~ 'flies'
NNS ~ 'bananas'
VBZ ~ 'flies'
NN ~ 'fruit':i
VBP ~ 'fruit':i
IN ~ 'like'
VBP ~ 'like'
NN ~ 'time':i
VBP ~ 'time':i
RB ~ 'fast'
VBP ~ 'fast'
JJ ~ 'fast'
NN ~ 'fast'
VBP ~ 'spoil'
END_OF_SOURCE
}
);
my $expected = <<'EOS';
[To be written]
EOS
my $paragraph = <<END_OF_PARAGRAPH;
Time flies like an arrow.
Time flies fast.
Fruit flies like a banana.
Fruit flies spoil banana.
END_OF_PARAGRAPH
my $recce;
for my $sentence (split /\n/, $paragraph){
$recce = Marpa::R2::Scanless::R->new( { grammar => $grammar } );
$recce->read( \$sentence );
if ( $recce->ambiguity_metric() > 1 ) {
my $asf = Marpa::R2::ASF->new( { slr => $recce } );
die 'No ASF' if not defined $asf;
$asf->traverse( {}, \&traverser );
}
}
sub all_choices{
my $glade = shift;
# The results at each position are a list of choices, so
# to produce a new result list, we need to take a Cartesian
# product of all the choices
my $length = $glade->rh_length();
my @results = ( [] );
for my $rh_ix ( 0 .. $length - 1 ) {
my @new_results = ();
for my $old_result (@results) {
my $child_value = $glade->rh_value($rh_ix);
for my $new_value ( @{ $child_value } ) {
push @new_results, [ @{$old_result}, $new_value ];
}
}
@results = @new_results;
} ## end for my $rh_ix ( 0 .. $length - 1 )
return @results;
}
sub traverser {
my ($glade, $scratch) = @_;
# get rule and symbol IDs
my $rule_id = $glade->rule_id();
my $symbol_id = $glade->symbol_id();
my $symbol_name = $grammar->symbol_name($symbol_id);
my $display_form = $grammar->symbol_display_form($symbol_id);
# get span
my ( $start, $length ) = $glade->span();
my $literal = $glade->literal();
# set glade's prefix
my $prefix = sprintf "%3s-%2s", $start, $start + $length;
# check for tokens
if (not defined $rule_id){
say "# token\n", $prefix, " T : ", sprintf("%-30s", $literal), " <- $display_form";
return ["($symbol_name $literal)"];
}
# get rule expansion
unless ( $symbol_name eq '[:start]' ){
my ($lhs_id, @rhs_ids) =
map { $grammar->symbol_display_form($_) }
$grammar->rule_expand($rule_id);
my $rule = "$lhs_id ::= " . join q{ }, @rhs_ids;
# rule
say $prefix, " R : ", sprintf("%-30s", $literal), " <- $rule";
}
my @return_value;
CHOICE: while (1) {
# The results at each position are a list of choices, so
# to produce a new result list, we need to take a Cartesian
# product of all the choices
my @results = all_choices( $glade );
# Special case for the start rule
if ( $symbol_name eq '[:start]' ) {
my $retval = [ map { join q{}, @{$_} } @results ];
# say Dump $retval;
return $retval;
}
# Now we have a list of choices, as a list of lists. Each sub list
# is a list of tokens, which we need to join into
# a single token. The result will be to collapse
# one level of lists, and leave us with a list of Penn-tagged
# elements
my $join_ws = q{ };
$join_ws = qq{\n } if $symbol_name eq 'S';
my $push_val = [ map { '(' . $symbol_name . q{ } . ( join $join_ws, @{$_} ) . ')' }
@results ];
# say "# choice$prefix ", Dump $push_val;
push @return_value, @$push_val;
# Look at the next alternative in this glade, or end the
# loop if there is none
last CHOICE if not defined $glade->next();
} ## end CHOICE: while (1)
# Return the list of tokens for this glade
say "# glade $prefix: $literal ", Dump \@return_value;
return \@return_value;
} ## end sub traverser
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment