#!/usr/bin/perl -w # Copyright 2012 Jeffrey Kegler # This file is part of Marpa::PP. Marpa::PP is free software: you can # redistribute it and/or modify it under the terms of the GNU Lesser # General Public License as published by the Free Software Foundation, # either version 3 of the License, or (at your option) any later version. # # Marpa::PP is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser # General Public License along with Marpa::PP. If not, see # http://www.gnu.org/licenses/. use 5.010; use strict; use warnings; use charnames ':full'; use Scalar::Util; use Data::Dumper (); use English qw( -no_match_vars ); use Test::More (); use lib 'pperl'; BEGIN { my $PPI_problem; CHECK_PPI: { if ( not eval { require PPI } ) { $PPI_problem = 'PPI not installed'; last CHECK_PPI; } if ( not PPI->VERSION(1.206) ) { $PPI_problem = 'PPI 1.206 not installed'; } } ## end CHECK_PPI: if ($PPI_problem) { Test::More::plan skip_all => $PPI_problem; } else { Test::More::plan tests => 14; } Test::More::use_ok('Marpa::PP'); Test::More::use_ok('Marpa::PP::Perl'); } ## end BEGIN our @OUTPUT = (); our %SYMTAB = ( SCALAR => {} ); sub DEBUG_dump { say {*STDERR} 'DEBUG: ', join "\n", @main::OUTPUT or die "Cannot print to STDERR: $ERRNO"; say {*STDERR} 'DEBUG: Symbol table: ', Data::Dumper::Dumper( \%SYMTAB ) or die "Cannot print to STDERR: $ERRNO"; return; } ## end sub DEBUG_dump # This code is about Perl GRAMMAR. # If you're writing # a Perl SEMANTICS, and looking for a place to start, # you probably don't want to start here. # The purpose of these semantics is to test the grammar -- no more. # They are probably good for nothing else. # # Here are some of the defects: # # 1. Not a 'safe' evaluator for code from untrusted sources. # 'eval' is used to interpret the string constants. # # 2. Most Perl semantics is not implementation and where # the implementation exists it often is at the toy level. # Basically, anything not needed to interpret # Data::Dumper output is ignored. # # 3. No optimization. It's fast enough for a test suite. # # 4. Etc., etc., etc. You get the idea. sub coerce_to_R { my ($tagged) = @_; my ( $side, $v ) = @{$tagged}; return $side eq 'R' ? $v : ${$v}; } sub do_term_lstop { my ( undef, $lstop, $list_tagged ) = @_; die "Unimplemented lstop: $lstop" if $lstop ne 'bless'; my $list_ref = coerce_to_R($list_tagged); return [ 'L', \\( bless $list_ref->[0], $list_ref->[1] ) ]; } ## end sub do_term_lstop # term_hi : term_hi ARROW '{' expr ';' '}' ; term_hi__arrow_hash /* somehref->{bar();} */ sub do_term_hi__arrow_hash { my ( undef, $term, undef, undef, $element ) = @_; my $element_ref = coerce_to_R($element); my $element_ref_type = Scalar::Util::reftype $element_ref; die "element in term->[element] is not an scalar: $element_ref_type" if $element_ref_type ne 'SCALAR'; my ( $term_side, $term_ref ) = @{$term}; if ( $term_side eq 'L' ) { $term_ref = ${$term_ref}; } if ( ( my $ref_type = Scalar::Util::reftype $term_ref) ne 'REF' or ( my $ref_ref_type = Scalar::Util::reftype ${$term_ref} ) ne 'HASH' ) { my $type = $ref_type eq 'REF' ? "REF to $ref_ref_type" : $ref_type; die "term in term->[element] is not an array ref: it is $type"; } ## end if ( ( my $ref_type = Scalar::Util::reftype $term_ref...)) return [ 'L', \\( ${$term_ref}->{ ${$element_ref} } ) ]; } ## end sub do_term_hi__arrow_hash # term_hi : term_hi ARROW '[' expr ']' ; term_hi__arrow_array /* somearef->[$element] */ sub do_term_hi__arrow_array { my ( undef, $term, undef, undef, $element ) = @_; my $element_ref = coerce_to_R($element); my $element_ref_type = Scalar::Util::reftype $element_ref; die "element in term->[element] is not an scalar: $element_ref_type" if $element_ref_type ne 'SCALAR'; my ( $term_side, $term_ref ) = @{$term}; if ( $term_side eq 'L' ) { $term_ref = ${$term_ref}; } if ( ( my $ref_type = Scalar::Util::reftype $term_ref) ne 'REF' or ( my $ref_ref_type = Scalar::Util::reftype ${$term_ref} ) ne 'ARRAY' ) { my $type = $ref_type eq 'REF' ? "REF to $ref_ref_type" : $ref_type; die "term in term->[element] is not an array ref: it is $type"; } ## end if ( ( my $ref_type = Scalar::Util::reftype $term_ref...)) return [ 'L', \\( ${$term_ref}->[ ${$element_ref} ] ) ]; } ## end sub do_term_hi__arrow_array # term_hi : scalar '{' expr ';' '}' ; hash_index /* $foo->{bar();} */ # term_hi : term_hi '{' expr ';' '}' ; hash_index_r /* $foo->[bar]->{baz;} */ sub do_hash_index { my ( undef, $term, undef, $element ) = @_; my $element_ref = coerce_to_R($element); my $element_ref_type = Scalar::Util::reftype $element_ref; die "element in term->[element] is not an scalar: $element_ref_type" if $element_ref_type ne 'SCALAR'; my ( $term_side, $term_ref ) = @{$term}; if ( $term_side eq 'R' ) { die 'rvalue term in scalar[element] not implemented'; } if ( ( my $ref_type = Scalar::Util::reftype ${$term_ref} ) ne 'REF' or ( my $ref_ref_type = Scalar::Util::reftype ${ ${$term_ref} } ) ne 'HASH' ) { my $type = $ref_type eq 'REF' ? "REF to $ref_ref_type" : $ref_type; die "scalar in scalar[element] is not an hash ref: it is $type"; } ## end if ( ( my $ref_type = Scalar::Util::reftype ${$term_ref...})) return [ 'L', \\( ${ ${$term_ref} }->{ ${$element_ref} } ) ]; } ## end sub do_hash_index sub do_array_index { my ( undef, $term, undef, $element ) = @_; my $element_ref = coerce_to_R($element); my $element_ref_type = Scalar::Util::reftype $element_ref; die "element in term->[element] is not an scalar: $element_ref_type" if $element_ref_type ne 'SCALAR'; my ( $term_side, $term_ref ) = @{$term}; if ( $term_side eq 'R' ) { die 'rvalue term in scalar[element] not implemented'; } if ( ( my $ref_type = Scalar::Util::reftype ${$term_ref} ) ne 'REF' or ( my $ref_ref_type = Scalar::Util::reftype ${ ${$term_ref} } ) ne 'ARRAY' ) { my $type = $ref_type eq 'REF' ? "REF to $ref_ref_type" : $ref_type; die "scalar in scalar[element] is not an hash ref: it is $type"; } ## end if ( ( my $ref_type = Scalar::Util::reftype ${$term_ref...})) return [ 'L', \\( ${ ${$term_ref} }->[ ${$element_ref} ] ) ]; } ## end sub do_array_index sub do_argexpr { my ( undef, $argexpr, undef, $term ) = @_; my $argexpr_ref = coerce_to_R($argexpr); my @result; given ( Scalar::Util::reftype $argexpr_ref) { when ('REF') { push @result, ${$argexpr_ref} } when ('SCALAR') { push @result, ${$argexpr_ref} } when ('ARRAY') { push @result, @{$argexpr_ref} } when ('HASH') { push @result, %{$argexpr_ref} } default { die "Unknown argexpr type: $_" } } ## end given my $term_ref = coerce_to_R($term); given ( Scalar::Util::reftype $term_ref) { when ('REF') { push @result, ${$term_ref} } when ('SCALAR') { push @result, ${$term_ref} } when ('ARRAY') { push @result, @{$term_ref} } when ('HASH') { push @result, %{$term_ref} } default { die "Unknown term type: $_" } } ## end given return [ 'L', \\@result ]; } ## end sub do_argexpr # scalar assignment only sub do_assign { my ( undef, $lhs, undef, $rhs ) = @_; my ( $side, $lhs_ref ) = @{$lhs}; my $rhs_ref = coerce_to_R($rhs); # If the LHS is actually an rvalue, # it is the name of a variable # passed up from a 'scalar' rule. # In this 'toy' semantics, that's how # variables are "declared". if ( $side eq 'R' ) { my $name = ${$lhs_ref}; if ( not defined $name or ref $name ) { die 'assignment to non-lvalue: ', Data::Dumper::Dumper($name); } my $v = ${$rhs_ref}; $SYMTAB{SCALAR}->{$name} = \$v; $lhs_ref = \( $SYMTAB{SCALAR}->{$name} ); return [ 'L', $lhs_ref ]; } ## end if ( $side eq 'R' ) if ( Scalar::Util::readonly ${ ${$lhs_ref} } ) { die 'lhs is read only!'; } ${ ${$lhs_ref} } = ${$rhs_ref}; return [ 'L', $lhs_ref ]; } ## end sub do_assign sub do_THING { my ( undef, $value ) = @_; return [ 'R', \$value ]; } sub do_anon_array { my ( undef, undef, $expr ) = @_; my $value_ref = coerce_to_R($expr); my @result = (); given ( Scalar::Util::reftype $value_ref) { when ('SCALAR') { push @result, ${$value_ref} } when ('REF') { push @result, ${$value_ref} } when ('ARRAY') { push @result, @{$value_ref} } when ('HASH') { push @result, %{$value_ref} } default { die "Unknown expr type: $_" } } ## end given return [ 'L', \\[@result] ]; } ## end sub do_anon_array sub do_anon_empty_array { return [ 'L', \\[] ]; } sub do_anon_hash { my ( undef, undef, $expr ) = @_; my $value_ref = coerce_to_R($expr); my $result; given ( Scalar::Util::reftype $value_ref) { when ('REF') { die 'expr for anon hash cannot be REF' } when ('SCALAR') { die 'expr for anon hash cannot be SCALAR' } when ('ARRAY') { $result = { @{$value_ref} } } when ('HASH') { $result = \%{$value_ref} } default { die "Unknown expr type: $_" } } ## end given return [ 'R', \$result ]; } ## end sub do_anon_hash sub do_anon_empty_hash { return [ 'R', \{} ]; } # This assume that all 'my' variables # are just ways to create # undef lvalue's -- which is how # Data::Dumper uses them sub do_term_my { my $v = undef; return [ 'L', \\$v ]; } # Very simplified here -- # References are dereferenced and passed up. # All scalars not # already defined are returned as strings. # It is assumed that they will either be the only # thing on the LHS of an assignment, or in # a my declaration. Data::Dumper uses my # declarations to create undef's so the scalar # names # that go up to term_my's will be thrown away. sub do_scalar { my ( undef, $dollar, $tagged_ob ) = @_; my ( $side, $ob_ref ) = @{$tagged_ob}; if ( $side eq 'R' ) { my $name = ${$ob_ref}; my $scalars = $SYMTAB{SCALAR}; if ( exists $scalars->{$name} ) { return [ 'L', \$scalars->{$name} ]; } return [ 'R', \$name ]; } ## end if ( $side eq 'R' ) $ob_ref = ${$ob_ref}; my $ob = ${$ob_ref}; if ( ref $ob ) { return [ 'L', \$ob ]; } return [ 'R', $ob ]; } ## end sub do_scalar sub do_uniop { my ( undef, $op ) = @_; die "Unknown uniop: $op" if $op ne 'undef'; return [ 'R', \undef ]; } # refgen is always an rvalue sub do_refgen { my ( undef, undef, $s1 ) = @_; return [ 'R', \coerce_to_R($s1) ]; } # prog should always return an rvalue sub do_prog { my ( undef, $s1 ) = @_; return [ 'R', coerce_to_R($s1) ]; } sub symbol_1 { my ( undef, $s1 ) = @_; return $s1; } sub symbol_2 { my ( undef, undef, $s2 ) = @_; return $s2; } sub token_1 { my ( undef, $a ) = @_; return [ 'R', \$a ]; } my %unwrapped = ( and_expr__t => \&symbol_1, anon_empty_hash => \&do_anon_empty_hash, anon_hash => \&do_anon_hash, argexpr__comma => \&symbol_1, argexpr => \&do_argexpr, argexpr__t => \&symbol_1, array_index => \&do_array_index, array_index_r => \&do_array_index, block => \&symbol_2, do_block => \&symbol_2, expr => \&symbol_1, hash_index => \&do_hash_index, hash_index_r => \&do_hash_index, indirob__block => \&symbol_1, indirob__WORD => \&token_1, lineseq__line => \&symbol_2, line__sideff => \&symbol_2, listexpr => \&symbol_1, myterm_scalar => \&symbol_1, or_expr__t => \&symbol_1, prog => \&do_prog, refgen => \&do_refgen, scalar => \&do_scalar, sideff => \&symbol_1, term_addop__t => \&symbol_1, term_andand__t => \&symbol_1, term_arrow__t => \&symbol_1, term_assign => \&do_assign, term_assign_lstop => \&do_assign, term_assign__t => \&symbol_1, term_bitandop__t => \&symbol_1, term_bitorop__t => \&symbol_1, term_cond__t => \&symbol_1, term_dotdot__t => \&symbol_1, term_eqop__t => \&symbol_1, term_hi__anon_array => \&do_anon_array, term_hi__anon_empty_array => \&do_anon_empty_array, term_hi__arrow_array => \&do_term_hi__arrow_array, term_hi__arrow_hash => \&do_term_hi__arrow_hash, term_hi__parens => \&symbol_2, term_hi__scalar => \&symbol_1, term_hi__subscripted => \&symbol_1, term_hi__THING => \&do_THING, term_increment__t => \&symbol_1, term_listop__t => \&symbol_1, term_lstop => \&do_term_lstop, term_matchop__t => \&symbol_1, term_mulop__t => \&symbol_1, term_my => \&do_term_my, term_notop__t => \&symbol_1, term_oror__t => \&symbol_1, term_powop__t => \&symbol_1, term_relop__t => \&symbol_1, term_require__t => \&symbol_1, term_shiftop__t => \&symbol_1, term__t => \&symbol_1, term_uminus__t => \&symbol_1, term_uniop__t => \&symbol_1, uniop => \&do_uniop, ); sub gen_closure { my ( $lhs, $rhs, $action ) = @_; my $closure = $unwrapped{$action}; die "lhs=$lhs: $closure is not a closure" if defined $closure and ref $closure ne 'CODE'; return sub { if ( not defined $closure ) { die 'No action defined for ', "$lhs ::= " . ( join q{ }, map { $_ // q{-} } @{$rhs} ); } my $v = $closure->(@_); local $Data::Dumper::Terse = 1; local $Data::Dumper::Indent = 0; # local $Data::Dumper::Maxdepth = 4; push @main::OUTPUT, "$lhs ::= " . ( join q{ }, map { $_ // q{-} } @{$rhs} ) . q{; } . Data::Dumper::Dumper( \$v ); $v; }; } ## end sub gen_closure my %symbol = (); my %closure = (); ## Tests from dumper.t my $parser = Marpa::PP::Perl->new( \&gen_closure ); # Perlcritic cannot figure out that $a and $b are not magic variables # for a sort comparison # Trivial if (1) { my $a = 1; test( [$a], [qw(a)] ); } if (1) { my @c = ('c'); my $c = \@c; my $b = {}; my $a = [ 1, $b, $c ]; $b->{a} = $a; $b->{b} = $a->[1]; $b->{c} = $a->[2]; test( [ $a, $b, $c ], [qw(a b c)] ); } ## end if (1) if (1) { my $foo = { "abc\N{NULL}\'\efg" => "mno\N{NULL}", 'reftest' => \\1, }; test( [$foo], [qw($foo)] ); } ## end if (1) if (1) { my $foo = 5; my @foo = ( -10, \$foo ); my %foo = ( a => 1, b => \$foo, c => \@foo ); $foo{d} = \%foo; $foo[2] = \%foo; test( [ \%foo ], [qw($foo)] ); } ## end if (1) if (1) { my @dogs = qw( Fido Wags ); my %kennel = ( First => \$dogs[0], Second => \$dogs[1], ); $dogs[2] = \%kennel; my $mutts = \%kennel; eval { test( [ \@dogs, \%kennel, $mutts ], [qw($dogs $kennel $mutts)] ); 1; } or die "Eval failed: $EVAL_ERROR"; } ## end if (1) if (1) { my $a = []; $a->[1] = \$a->[0]; test( [$a], [qw($a)] ); } if (1) { my $a = \\\\\'foo'; my $b = ${ ${$a} }; test( [ $a, $b ], [qw($a $b)] ); } if (1) { ## no critic (Variables::RequireLocalizedPunctuationVars) my $b; my $a = [ { a => \$b }, { b => undef } ]; $b = [ { c => \$b }, { d => \$a } ]; test( [ $a, $b ], [qw($a $b)] ); } ## end if (1) if (1) { my $a = [ [ [ [ \\\\\'foo' ] ] ] ]; my $b = $a->[0][0]; my $c = ${ ${ $b->[0][0] } }; test( [ $a, $b, $c ], [qw($a $b $c)] ); } ## end if (1) if (1) { my $f = 'pearl'; my $e = [$f]; my $d = { 'e' => $e }; my $c = [$d]; my $b = { 'c' => $c }; my $a = { 'b' => $b }; test( [ $a, $b, $c, $d, $e, $f ], [qw($a $b $c $d $e $f)] ); } ## end if (1) if (1) { ## no critic (Variables::RequireLocalizedPunctuationVars) my $a; $a = \$a; my $b = [$a]; test( [$b], [qw($b)] ); } ## end if (1) ## Test from Randal Schwartz if (1) { my $x = bless { fred => 'flintstone' }, 'x'; my $y = bless \$x, 'y'; test( [ $x, $y ], [qw($x $y)] ); } ## no critic (Subroutines::RequireArgUnpacking) sub test { my $input = Data::Dumper->new(@_)->Purity(1)->Sortkeys(1)->Dumpxs; # Table by type and name of data # All data is kept as refs. # For orthogonality, that includes scalars. %SYMTAB = (); @OUTPUT = (); my $value_ref = $parser->parse( \$input ); if ( not defined $value_ref ) { die 'Perl parse failed'; } my @pointers = (); my @names = (); for my $type ( sort keys %SYMTAB ) { my $sigil = $type eq 'SCALAR' ? q{$} : $type eq 'REF' ? q{$} : $type eq 'ARRAY' ? q{@} : $type eq 'HASH' ? q{@} : q{!}; my $symbols_by_name = $SYMTAB{$type}; for my $name ( sort keys %{$symbols_by_name} ) { my $ref = $symbols_by_name->{$name}; # The testing convention is to pass scalars directly $type eq 'SCALAR' and $ref = ${$ref}; push @pointers, $ref; push @names, "$sigil$name"; } ## end for my $name ( sort keys %{$symbols_by_name} ) } ## end for my $type ( sort keys %SYMTAB ) my $output = Data::Dumper->new( \@pointers, \@names )->Purity(1)->Sortkeys(1) ->Dumpxs; Test::More::is( $output, $input ); return; } ## end sub test ## use critic 1; # In case used as "do" file