#!/usr/bin/perl -w use strict; use Data::Dumper; use Parse::Eyapp; use Parse::Eyapp::Treeregexp; use Test::More; if( $] <= 5.007) { plan skip_all => 'Old Perl'; } else { plan tests => 9; } my $grammar = q{ /* Scope Analysis TODO: Attempt to implement DAGS to represent types */ %{ use strict; use Data::Dumper; use Test::More; use List::Util qw(reduce); use Parse::Eyapp::Base qw(firstval lastval); my %reserved = ( int => "INT", char => "CHAR", if => "IF", else => "ELSE", break => "BREAK", continue => "CONTINUE", return => "RETURN", while => "WHILE" ); my %lexeme = ( '=' => "ASSIGN", '+' => "PLUS", '-' => "MINUS", '*' => "TIMES", '/' => "DIV", '%' => "MOD", '|' => "OR", '&' => "AND", '{' => "LEFTKEY", '}' => "RIGHTKEY", ',' => "COMMA", ';' => "SEMICOLON", '(' => "LEFTPARENTHESIS", ')' => "RIGHTPARENTHESIS", '[' => "LEFTBRACKET", ']' => "RIGHTBRACKET", '==' => "EQUAL", '+=' => "PLUSEQUAL", '-=' => "MINUSEQUAL", '*=' => "TIMESEQUAL", '/=' => "DIVEQUAL", '%=' => "MODEQUAL", '!=' => "NOTEQUAL", '<' => "LESS", '>' => "GREATER", '<=' => "LESSEQUAL", '>=' => "GREATEREQUAL", '++' => "INC", '--' => "DEC", '**' => "EXP" ); sub is_duplicated { my ($st1, $st2) = @_; my $id; defined($id=firstval{exists $st1->{$_}} keys %$st2) and return "Error. Variable $id at line $st2->{$id}->{line} declared twice.\n"; return 0; } sub build_type { my $bt = shift; my @arrayspec = shift()->children(); my $type = ''; for my $s (@arrayspec) { $type .= "A_$s->{attr}[0]("; } if ($type) { $type = "$type$bt".(")"x@arrayspec); } else { $type = $bt; } return $type; } my ($tokenbegin, $tokenend); my %type = ( INT => 1, CHAR => 1, ); my %st; # Global symbol table my $depth = 0; my @pending_blocks; sub build_function_scope { my ($funcDef, $returntype) = @_; my $function_name = $funcDef->{function_name}[0]; my @parameters = @{$funcDef->{parameters}}; my $lst = $funcDef->{symboltable}; my $numargs = scalar(@parameters); #compute type my $partype = ""; if (@parameters) { $partype .= reduce { "$lst->{$a}{type},$lst->{$b}{type}" } @parameters; } my $type = "F(X_$numargs($partype),$returntype)"; #insert it in the hash of types $type{$type} = 1; #insert it in the global symbol table die "Duplicated declaration of $function_name at line $funcDef-->{attr}[1]\n" if exists($st{$function_name}); $st{$function_name}->{type} = $type; $st{$function_name}->{line} = $funcDef->{function_name}[1]; return $funcDef; } %} %syntactic token '=' '+=' '-=' '*=' '/=' '%=' '(' '[' %syntactic token '|' '&' '==' '!=' '<' '>' '>=' '<=' %syntactic token '+' '-' '*' %syntactic token '/' '%' '**' '++' '--' 'ELSE' %syntactic token RETURN BREAK CONTINUE %nonassoc WEAK %nonassoc '(' '[' %right '=' '+=' '-=' '*=' '/=' '%=' %left '|' %left '&' %left '==' '!=' %left '<' '>' '>=' '<=' %left '+' '-' %left '*' '/' '%' %right '**' %right '++' '--' %right 'ELSE' %tree %% program: { $tokenbegin = $tokenend = 1 } definition<%name PROGRAM +>.program { $program->{symboltable} = { %st }; # creates a copy of the s.t. for (keys %type) { $type{$_} = Parse::Eyapp::Node->new($_); } $program->{depth} = 0; $program->{line} = 1; $program->{types} = { %type }; $program->{lines} = $tokenend; # Reset file scope variables %st = (); # reset symbol table ($tokenbegin, $tokenend) = (1, 1); %type = (INT => "INT", CHAR => "CHAR"); $program; } ; definition: $funcDef { build_function_scope($funcDef, 'INT'); } | %name FUNCTION $basictype $funcDef { build_function_scope($funcDef, $basictype->type); } | declaration { #control duplicated declarations my $message; die $message if $message = is_duplicated(\%st, $_[1]); %st = (%st, %{$_[1]}); # improve this code return undef; # will not be inserted in the AST } ; basictype: %name INT INT | %name CHAR CHAR ; funcDef: $ID '(' $params ')' $block { my $st = $block->{symboltable}; my @decs = $params->children(); $block->{parameters} = []; while (my ($bt, $id, $arrspec) = splice(@decs, 0, 3)) { my $bt = ref($bt); # The string 'INT', 'CHAR', etc. my $name = $id->{attr}[0]; my $type = build_type($bt, $arrspec); $type{$type} = 1; # has too much $type for me! # control duplicated declarations die "Duplicated declaration of $name at line $id->{attr}[1]\n" if exists($st->{$name}); $st->{$name}->{type} = $type; $st->{$name}->{param} = 1; $st->{$name}->{line} = $id->{attr}[1]; push @{$block->{parameters}}, $name; } $block->{function_name} = $ID; $block->type("FUNCTION"); return $block; } ; params: ( basictype ID arraySpec)<%name PARAMS * ','> { $_[1] } ; block: '{'.bracket { $depth++ } /* intermediate action! */ declaration<%name DECLARATIONS *>.decs statement<%name STATEMENTS *>.sts '}' { my %st; for my $lst ($decs->children) { # control duplicated declarations my $message; die $message if $message = is_duplicated(\%st, $lst); %st = (%st, %$lst); } $sts->{symboltable} = \%st; $sts->{line} = $bracket->[1]; $sts->{depth} = $depth--; $sts->type("BLOCK"); push @pending_blocks, $sts; return $sts; } ; declaration: %name DECLARATION $basictype $declList ';' { my %st; # Symbol table local to this declaration my $bt = $basictype->type; my @decs = $declList->children(); while (my ($id, $arrspec) = splice(@decs, 0, 2)) { my $name = $id->{attr}[0]; my $type = build_type($bt, $arrspec); $type{$type} = 1; # has too much $type for me! # control duplicated declarations die "Duplicated declaration of $name at line $id->{attr}[1]\n" if exists($st{$name}); $st{$name}->{type} = $type; $st{$name}->{line} = $id->{attr}[1]; } return \%st; } ; declList: (ID arraySpec) <%name VARLIST + ','> { $_[1] } ; arraySpec: ( '[' INUM ']')* { $_[1]->type("ARRAYSPEC"); $_[1] } ; statement: expression ';' { $_[1] } | ';' | %name BREAK BREAK ';' | %name CONTINUE CONTINUE ';' | %name EMPTYRETURN RETURN ';' | %name RETURN RETURN expression ';' | block { $_[1] } | %name IF ifPrefix statement %prec '+' | %name IFELSE ifPrefix statement 'ELSE' statement | %name WHILE loopPrefix statement ; ifPrefix: IF '(' expression ')' { $_[3] } ; loopPrefix: WHILE '(' expression ')' { $_[3] } ; expression: binary <+ ','> { return $_[1]->child(0) if ($_[1]->children() == 1); return $_[1]; } ; Variable: %name VAR ID %prec WEAK | %name VARARRAY ID ('[' binary ']') <%name INDEXSPEC +> ; Primary: %name INUM INUM | Variable %prec WEAK { $_[1] } | '(' expression ')' { $_[2] } | %name FUNCTIONCALL ID '(' binary <%name ARGLIST * ','> ')' ; Unary: '++' Variable | '--' Variable | Primary { $_[1] } ; binary: Unary { $_[1] } | %name PLUS binary '+' binary | %name MINUS binary '-' binary | binary '*' binary | binary '/' binary | binary '%' binary | %name LT binary '<' binary | %name GT binary '>' binary | binary '>=' binary | binary '<=' binary | binary '==' binary | binary '!=' binary | binary '&' binary | binary '**' binary | binary '|' binary | %name ASSIGN Variable '=' binary | Variable '+=' binary | Variable '-=' binary | Variable '*=' binary | Variable '/=' binary | Variable '%=' binary ; %% sub _Error { my($token)=$_[0]->YYCurval; my($what)= $token ? "input: '$token->[0]'" : "end of input"; my @expected = $_[0]->YYExpect(); die "Syntax error near $what in line $token->[1]. Expected one of these tokens: @expected\n"; } sub _Lexer { my($parser)=shift; for ($parser->YYData->{INPUT}) { return('',undef) if !defined($_) or $_ eq ''; #Skip blanks s{\A ((?: \s+ # any white space char | /\*.*?\*/ # C like comments )+ ) } {}xs and do { my($blanks)=$1; #Maybe At EOF return('', undef) if $_ eq ''; $tokenend += $blanks =~ tr/\n//; }; $tokenbegin = $tokenend; s/^([0-9]+(?:\.[0-9]+)?)// and return('INUM',[$1, $tokenbegin]); s/^([A-Za-z][A-Za-z0-9_]*)// and do { my $word = $1; my $r; return ($r, [$r, $tokenbegin]) if defined($r = $reserved{$word}); return('ID',[$word, $tokenbegin]); }; s/^(\S)// and do { my $token1 = $1; m{^(\S)}; my $token2 = $2; my $ltoken = defined($token2)?"$token1$token2":$token1; if (exists($lexeme{$ltoken})) { s/^.// if length($ltoken) > 1; return ($ltoken, [$ltoken, $tokenbegin]); } die "Error. Unexpected token $ltoken\n"; }; # do } # for } sub Parse::Eyapp::Node::build_blocks_tree { my $t = shift; # tree my (@b, @blocks); @b = @blocks = $SimpleTrans::blocks->m($t); while (@blocks) { my $b = pop @blocks; my $d = $b->{depth}; my $f = lastval { $_->{depth} < $d} @blocks; last unless $f; $b->{fatherblock} = $f; #print "depth=$b->{depth}, node=$b, father= $b->{fatherblock}\n"; } wantarray? @b : $t; } sub Parse::Eyapp::Node::build_blocks_tree2 { my $t = shift; # tree my @b = $SimpleTrans::blocks->m($t); for (@b) { my ($n, $d, $f, $ch) = @$_; if (defined($f)) { $n->{fatherblock} = $f->[0]; # print "depth=$n->{depth}, node=$n, father= $n->{fatherblock}\n"; } else { # print "depth=$n->{depth}, node=$n, father= nofather\n"; } } wantarray? @b : $t; } sub Parse::Eyapp::Node::build_blocks_tree3 { my $t = shift; # tree my @b = $SimpleTrans::blocks->m($t); $_->[0]->{fatherblock} = $_->[2][0] for (@b); return @b; } sub Parse::Eyapp::Node::build_blocks_tree_with_subtree { my $t = shift; # tree my @b = $SimpleTrans::blocks->m($t); $_->{node}{fatherblock} = $_->{father}{node} for (@b); return @b; } my @tests = ( #Correct program << "EOICORRECT", f() { int a,b[1][2],c[1][2][3]; char d[10]; b[0][1] = a; } EOICORRECT # << "EOI_TWICE", #/* Duplicated declaration of a at line 2 */ #f() { # int a,b[1][2],a[1][2][3]; # char d[10]; # b[0][1] = a; #} #EOI_TWICE # << "EOI_TWICE_DIF_DEC", #/* Duplicated declaration of a at line 3 */ #f() { # int a,b[1][2],c[1][2][3]; # char d[10], b[9]; # b[0] = a; #} #EOI_TWICE_DIF_DEC # # Correct program. Global and local decs << "EOI_GLOBAL_DEC", int a,b[1][2],c[1][2][3]; char d,e[1][2]; f() { int a[1],b[1][2],c[1][2][3]; char d[10], e[9]; b[0] = a[1]; } EOI_GLOBAL_DEC ## << "EOI_GLOBAL_DUP", ##/* Error: duplicated global dec */ ##int a,b[1][2],c[1][2][3]; ##char d,a[1][2]; ##f() { ## int a,b[1][2],c[1][2][3]; ## char d[10], e[9]; ## ## b[0][1] = a; ##} ##EOI_GLOBAL_DUP # Correct program. Parameters << "EOI_GLOBAL_PAR", int a,b[1][2],c[1][2][3]; char d,e[1][2]; f(int a, char b[10]) { int c[1][2][3]; char d[10], e[9]; b[0][1] = a; d[5] = e[4]; } EOI_GLOBAL_PAR # Correct program. Only global << "EOI_GLOBAL", int a,b[1][2],c[1][2][3]; EOI_GLOBAL # Correct program. Return char and Parameters << "EOI_RETURN", int a,b[1][2],c[1][2][3]; char d,e[1][2]; char f(int a, char b[10]) { int c[1][2]; char d[10], e[9]; return b[0]; } EOI_RETURN ## Correct program. No parameters << "EOI_RETURN_NOPAR", char d,e[1][2]; char f() { int c[2]; char d; return d; } EOI_RETURN_NOPAR # << "EOIPARAMDECLTWICE", #int a, b[1][2]; #char d, e[1][2]; #char f(int a, char b[10]) { # int c[1][2]; # char b[10], e[9]; # # return b[0]; #} #EOIPARAMDECLTWICE # Correct program. No parameters << "EOI_NESTED_BLOCKS", char d0; char f() { char d1; { char d2; } { char d2; { char d3; d3; } } { d0; } return d1; } EOI_NESTED_BLOCKS # Correct program. No parameters << "EOI_NESTED_BLOCKS2", char d0; char f() { { {} } { { } } { {{}} } } EOI_NESTED_BLOCKS2 << "EOI_NESTED_BLOCKS3", char d0; char f() { { {} } { { } } { {{}} } } g() { {} { {} } {} } EOI_NESTED_BLOCKS3 ); # end of @tests my @expected_tree = ( 'PROGRAM(FUNCTION[f](ASSIGN(VARARRAY(TERMINAL[b:4],INDEXSPEC(INUM(TERMINAL[0:4]),INUM(TERMINAL[1:4]))),VAR(TERMINAL[a:4]))))', 'PROGRAM(FUNCTION[f](ASSIGN(VARARRAY(TERMINAL[b:7],INDEXSPEC(INUM(TERMINAL[0:7]))),VARARRAY(TERMINAL[a:7],INDEXSPEC(INUM(TERMINAL[1:7]))))))', 'PROGRAM(FUNCTION[f](ASSIGN(VARARRAY(TERMINAL[b:7],INDEXSPEC(INUM(TERMINAL[0:7]),INUM(TERMINAL[1:7]))),VAR(TERMINAL[a:7])),ASSIGN(VARARRAY(TERMINAL[d:8],INDEXSPEC(INUM(TERMINAL[5:8]))),VARARRAY(TERMINAL[e:8],INDEXSPEC(INUM(TERMINAL[4:8]))))))', 'PROGRAM', 'PROGRAM(FUNCTION[f](RETURN(VARARRAY(TERMINAL[b:7],INDEXSPEC(INUM(TERMINAL[0:7]))))))', 'PROGRAM(FUNCTION[f](RETURN(VAR(TERMINAL[d:6]))))', 'PROGRAM(FUNCTION[f](BLOCK[4],BLOCK[7](BLOCK[9](VAR(TERMINAL[d3:12]))),BLOCK[15](VAR(TERMINAL[d0:16])),RETURN(VAR(TERMINAL[d1:19]))))', 'PROGRAM(FUNCTION[f](BLOCK[3](BLOCK[4]),BLOCK[6](BLOCK[7]),BLOCK[9](BLOCK[10](BLOCK[10]))))', 'PROGRAM(FUNCTION[f](BLOCK[3](BLOCK[4]),BLOCK[6](BLOCK[7]),BLOCK[9](BLOCK[10](BLOCK[10]))),FUNCTION[g](BLOCK[14],BLOCK[15](BLOCK[16]),BLOCK[18]))', ); my @expected_error = ( qr{Duplicated declaration of a at line}, qr{Error. Variable b at line 4 declared twice}, ); sub Run { my($self)=shift; my ($forest, $t); my ($k, $e) = (0, 0); for (@tests) { $self->YYData->{INPUT} = $_; # print "****************\n$_"; eval { $t = $self->YYParse( yylex => \&_Lexer, yyerror => \&_Error, #yydebug => 0x1F ); }; if ($@) { # print "\n$@"; like($@, $expected_error[$e++],"Simple4 error $e"); } else { # print $t->str."\n"; is($t->str, $expected_tree[$k++], "Simple scope tree $k"); my @blocks = $SimpleTrans::blocks->m($t); $_->node->{fatherblock} = $_->father->{node} for (@blocks[1..$#blocks]); $Data::Dumper::Deepcopy = 1; #print Dumper $t; # print $_->str."\n" for @blocks; push @$forest, $t; } } return $forest; } sub TERMINAL::info { my @a = join ':', @{$_[0]->{attr}}; return "@a" } sub FUNCTION::info { return $_[0]->{function_name}[0] } sub BLOCK::info { return $_[0]->{line} } }; ######### main ############## $Data::Dumper::Indent = 1; Parse::Eyapp::Treeregexp->new( STRING => q{ blocks: /BLOCK|FUNCTION|PROGRAM/ }, PACKAGE => 'SimpleTrans' )->generate(); # Syntax analysis Parse::Eyapp->new_grammar( input=>$grammar, classname=>'Rule6', #outputfile => 'match.pm', firstline=>9, ); my $parser = Rule6->new(); my $t = $parser->Run;