#!/usr/bin/perl -w use strict; use Parse::Eyapp::YATW; use Parse::Eyapp::Node; use Parse::Eyapp::Treeregexp; use Carp; use Getopt::Long; use Pod::Usage; my $infile; my $outfile; my $packagename; my $prefix = ''; my $syntax = 1; my $numbers = 1; my @PERL5LIB; # search path my $severity = 0; # 0 = Don't check arity. 1 = Check arity. 2 = Check arity and give a warning 3 = ... and croak GetOptions( 'in=s' => \$infile, 'out=s' => \$outfile, 'mod=s' => \$packagename, 'prefix=s' => \$prefix, 'severity=i' => \$severity, 'syntax!' => \$syntax, 'numbers!' => \$numbers, 'lib=s' => \@PERL5LIB, 'version' => \&version, 'usage' => \&usage, 'help' => \&man, ) or croak usage(); # filename packagename outputfile ($infile) = @ARGV unless defined($infile); die usage() unless defined($infile); $infile = "$infile.trg" unless -r $infile; unless (defined($outfile)) { if ($infile =~ /(.*)\.trg$/) { $outfile = "$1.pm"; } else { $outfile = "$infile.pm" } } my $treeparser = Parse::Eyapp::Treeregexp->new( INFILE => $infile, OUTPUTFILE => $outfile, PACKAGE => $packagename, PREFIX => $prefix, SYNTAX => $syntax, NUMBERS => $numbers, PERL5LIB => \@PERL5LIB, SEVERITY => $severity ); $treeparser->generate(); ###### Support subroutines ###### sub version { if (defined($Parse::Eyapp::Treeregparser::VERSION)) { print "Version $Parse::Eyapp::Treeregparser::VERSION\n"; } else { print "Unknown version\n"; } exit; } sub usage { print <<"END_ERR"; Supply the name of a file containing a tree grammar (.trg) Usage is: treereg [-m packagename] [[no]syntax] [[no]numbers] [-severity 0|1|2|3] \ [-p treeprefix] [-o outputfile] -i filename[.trg] END_ERR exit; } sub man { pod2usage( -exitval => 1, -verbose => 2 ); } __END__ =head1 NAME treereg - Compiler for Tree Regular Expressions =head1 SYNOPSIS treereg [-m packagename] [[no]syntax] [[no]numbers] [-severity 0|1|2|3] \ [-p treeprefix] [-o outputfile] [-lib /path/to/library/] -i filename[.trg] treereg [-m packagename] [[no]syntax] [[no]numbers] [-severity 0|1|2|3] \ [-p treeprefix] [-lib /path/to/library/] [-o outputfile] filename[.trg] treereg -v treereg -h =head1 OPTIONS Options can be used both with one dash and double dash. It is not necessary to write the full name of the option. A disambiguation prefix suffices. =over =item * C<-i[n] filename> Input file. Extension C<.trg> is assumed if no extension is provided. =item * C<-o[ut] filename> Output file. By default is the name of the input file (concatenated with .pm) =item * C<-m[od] packagename> Name of the package containing the generated subroutines. By default is the longest prefix of the input file name that conforms to the classic definition of integer C<[a-z_A-Z]\w*>. =item * C<-l[ib] /path/to/library/> Specifies that C will be included in C<@INC>. Useful when the C option is on. Can be inserted as many times as necessary. =item * C<-p[refix] treeprefix> Tree nodes automatically generated using C are objects blessed into the name of the production. To avoid crashes the programmer may prefix the class names with a given prefix when calling the parser; for example: $self->YYParse( yylex => \&_Lexer, yyerror => \&_Error, yyprefix => __PACKAGE__."::") The C<-prefix treeprefix> option simplifies the process of writing the tree grammar so that instead of writing with the full names CLASS::TIMES(CLASS::NUM, $x) and { $NUM->{VAL} == 0) => { $NUM } it can be written: TIMES(NUM, $x) and { $NUM->{VAL} == 0) => { $NUM } =item * C<-n[umbers]> Produces C<#line> directives. =item * C<-non[umbers]> Disable source file line numbering embedded in your parser =item * C<-sy[ntax]> Checks that Perl code is syntactically correct. =item * C<-nosy[ntax]> Does not check the syntax of Perl code =item * C<-se[verity] number> =over 2 =item - 0 = Don't check arity (default). Matching does not check the arity. The actual node being visited may have more children. =item - 1 = Check arity. Matching requires the equality of the number of children and the actual node and the pattern. =item - 2 = Check arity and give a warning =item - 3 = Check arity, give a warning and exit =back =item * C<-v[ersion]> Gives the version =item * C<-u[sage]> Prints the usage info =item * C<-h[elp]> Print this help =back =head1 DESCRIPTION C translates a tree grammar specification file (default extension C<.trg> describing a set of tree patterns and the actions to modify them using tree-terms like: TIMES(NUM, $x) and { $NUM->{VAL} == 0) => { $NUM } which says that wherever an abstract syntax tree representing the product of a numeric expression with value 0 times any other kind of expression, the C tree can be substituted by its left child. The compiler produces a Perl module containing the subroutines implementing those sets of pattern-actions. =head1 EXAMPLE Consider the following C grammar (see the C documentation to know more about C grammars): ---------------------------------------------------------- nereida:~/LEyapp/examples> cat Rule6.yp %{ use Data::Dumper; %} %right '=' %left '-' '+' %left '*' '/' %left NEG %tree %% line: exp { $_[1] } ; exp: %name NUM NUM | %name VAR VAR | %name ASSIGN VAR '=' exp | %name PLUS exp '+' exp | %name MINUS exp '-' exp | %name TIMES exp '*' exp | %name DIV exp '/' exp | %name UMINUS '-' exp %prec NEG | '(' exp ')' { $_[2] } /* Let us simplify a bit the tree */ ; %% sub _Error { die "Syntax error.\n"; } sub _Lexer { my($parser)=shift; $parser->YYData->{INPUT} or $parser->YYData->{INPUT} = or return('',undef); $parser->YYData->{INPUT}=~s/^\s+//; for ($parser->YYData->{INPUT}) { s/^([0-9]+(?:\.[0-9]+)?)// and return('NUM',$1); s/^([A-Za-z][A-Za-z0-9_]*)// and return('VAR',$1); s/^(.)//s and return($1,$1); } } sub Run { my($self)=shift; $self->YYParse( yylex => \&_Lexer, yyerror => \&_Error ); } ---------------------------------------------------------- Compile it using C: ---------------------------------------------------------- nereida:~/LEyapp/examples> eyapp Rule6.yp nereida:~/LEyapp/examples> ls -ltr | tail -1 -rw-rw---- 1 pl users 4976 2006-09-15 19:56 Rule6.pm ---------------------------------------------------------- Now consider this tree grammar: ---------------------------------------------------------- nereida:~/LEyapp/examples> cat Transform2.trg %{ my %Op = (PLUS=>'+', MINUS => '-', TIMES=>'*', DIV => '/'); %} fold: 'TIMES|PLUS|DIV|MINUS':bin(NUM($n), NUM($m)) => { my $op = $Op{ref($bin)}; $n->{attr} = eval "$n->{attr} $op $m->{attr}"; $_[0] = $NUM[0]; } zero_times_whatever: TIMES(NUM($x), .) and { $x->{attr} == 0 } => { $_[0] = $NUM } whatever_times_zero: TIMES(., NUM($x)) and { $x->{attr} == 0 } => { $_[0] = $NUM } /* rules related with times */ times_zero = zero_times_whatever whatever_times_zero; ---------------------------------------------------------- Compile it with C: ---------------------------------------------------------- nereida:~/LEyapp/examples> treereg Transform2.trg nereida:~/LEyapp/examples> ls -ltr | tail -1 -rw-rw---- 1 pl users 1948 2006-09-15 19:57 Transform2.pm ---------------------------------------------------------- The following program makes use of both modules C and C: ---------------------------------------------------------- nereida:~/LEyapp/examples> cat foldand0rule6_3.pl #!/usr/bin/perl -w use strict; use Rule6; use Parse::Eyapp::YATW; use Data::Dumper; use Transform2; $Data::Dumper::Indent = 1; my $parser = new Rule6(); my $t = $parser->Run; print "\n***** Before ******\n"; print Dumper($t); $t->s(@Transform2::all); print "\n***** After ******\n"; print Dumper($t); ---------------------------------------------------------- When the program runs with input C produces the following output: ---------------------------------------------------------- nereida:~/LEyapp/examples> foldand0rule6_3.pl b*(2-2) ***** Before ****** $VAR1 = bless( { 'children' => [ bless( { 'children' => [ bless( { 'children' => [], 'attr' => 'b', 'token' => 'VAR' }, 'TERMINAL' ) ] }, 'VAR' ), bless( { 'children' => [ bless( { 'children' => [ bless( { 'children' => [], 'attr' => '2', 'token' => 'NUM' }, 'TERMINAL' ) ] }, 'NUM' ), bless( { 'children' => [ bless( { 'children' => [], 'attr' => '2', 'token' => 'NUM' }, 'TERMINAL' ) ] }, 'NUM' ) ] }, 'MINUS' ) ] }, 'TIMES' ); ***** After ****** $VAR1 = bless( { 'children' => [ bless( { 'children' => [], 'attr' => 0, 'token' => 'NUM' }, 'TERMINAL' ) ] }, 'NUM' ); ---------------------------------------------------------- See also the section L for a more contrived example. =head1 SEE ALSO =over =item * L, =item * L =item * The pdf file in L =item * L (Spanish), =item * L, =item * L, =item * L, =item * yacc(1), =item * bison(1), =item * the classic book "Compilers: Principles, Techniques, and Tools" by Alfred V. Aho, Ravi Sethi and =item * Jeffrey D. Ullman (Addison-Wesley 1986) =item * L. =back =head1 AUTHOR Casiano Rodriguez-Leon =head1 LICENSE AND COPYRIGHT Copyright (C) 2006 by Casiano Rodriguez-Leon This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.8.8 or, at your option, any later version of Perl 5 you may have available.