%{ # pascal.eyp # # Pascal grammar in Eyapp format, based originally on BNF given # in "Standard Pascal -- User Reference Manual", by Doug Cooper. # This in turn is the BNF given by the ANSI and ISO Pascal standards, # and so, is PUBLIC DOMAIN. The grammar is for ISO Level 0 Pascal. # The grammar has been massaged somewhat to make it LALR, and added # the following extensions. # # constant expressions # otherwise statement in a case # productions to correctly match else's with if's # beginnings of a separate compilation facility # %} %strict %token AND ARRAY ASSIGNMENT CASE CHARACTER_STRING COLON COMMA CONST DIGSEQ %token DIV DO DOT DOTDOT DOWNTO ELSE END EQUAL EXTERNAL FOR FORWARD FUNCTION %token GE GOTO GT IDENTIFIER IF IN LABEL LBRAC LE LPAREN LT MINUS MOD NIL NOT %token NOTEQUAL OF OR OTHERWISE PACKED PBEGIN PFILE PLUS PROCEDURE PROGRAM RBRAC %token REALNUMBER RECORD REPEAT RPAREN SEMICOLON SET SLASH STAR STARSTAR THEN %token TO TYPE UNTIL UPARROW VAR WHILE WITH %tree %namingscheme { #Receives a Parse::Eyapp object describing the grammar my $self = shift; #$self->tokennames(); # returns the handler that will give names # to the right hand sides \&give_rhs_name; } %% file : program | module ; program : program_heading semicolon block DOT ; program_heading : PROGRAM identifier | PROGRAM identifier LPAREN identifier_list RPAREN ; identifier_list : identifier_list comma identifier | identifier ; block : label_declaration_part constant_definition_part type_definition_part variable_declaration_part procedure_and_function_declaration_part statement_part ; module : constant_definition_part type_definition_part variable_declaration_part procedure_and_function_declaration_part ; label_declaration_part : LABEL label_list semicolon | ; label_list : label_list comma label | label ; label : DIGSEQ ; constant_definition_part : CONST constant_list | ; constant_list : constant_list constant_definition | constant_definition ; constant_definition : identifier EQUAL cexpression semicolon ; /*constant : cexpression ; good stuff! */ cexpression : csimple_expression | csimple_expression relop csimple_expression ; csimple_expression : cterm | csimple_expression addop cterm ; cterm : cfactor | cterm mulop cfactor ; cfactor : sign cfactor | cexponentiation ; cexponentiation : cprimary | cprimary STARSTAR cexponentiation ; cprimary : identifier | LPAREN cexpression RPAREN | unsigned_constant | NOT cprimary ; constant : non_string | sign non_string | CHARACTER_STRING ; sign : PLUS | MINUS ; non_string : DIGSEQ | identifier | REALNUMBER ; type_definition_part : TYPE type_definition_list | ; type_definition_list : type_definition_list type_definition | type_definition ; type_definition : identifier EQUAL type_denoter semicolon ; type_denoter : identifier | new_type ; new_type : new_ordinal_type | new_structured_type | new_pointer_type ; new_ordinal_type : enumerated_type | subrange_type ; enumerated_type : LPAREN identifier_list RPAREN ; subrange_type : constant DOTDOT constant ; new_structured_type : structured_type | PACKED structured_type ; structured_type : array_type | record_type | set_type | file_type ; array_type : ARRAY LBRAC index_list RBRAC OF component_type ; index_list : index_list comma index_type | index_type ; index_type : ordinal_type ; ordinal_type : new_ordinal_type | identifier ; component_type : type_denoter ; record_type : RECORD record_section_list END | RECORD record_section_list semicolon variant_part END | RECORD variant_part END ; record_section_list : record_section_list semicolon record_section | record_section ; record_section : identifier_list COLON type_denoter ; variant_part : CASE variant_selector OF variant_list semicolon | CASE variant_selector OF variant_list | ; variant_selector : tag_field COLON tag_type | tag_type ; variant_list : variant_list semicolon variant | variant ; variant : case_constant_list COLON LPAREN record_section_list RPAREN | case_constant_list COLON LPAREN record_section_list semicolon variant_part RPAREN | case_constant_list COLON LPAREN variant_part RPAREN ; case_constant_list : case_constant_list comma case_constant | case_constant ; case_constant : constant | constant DOTDOT constant ; tag_field : identifier ; tag_type : identifier ; set_type : SET OF base_type ; base_type : ordinal_type ; file_type : PFILE OF component_type ; new_pointer_type : UPARROW domain_type ; domain_type : identifier ; variable_declaration_part : VAR variable_declaration_list semicolon | ; variable_declaration_list : variable_declaration_list semicolon variable_declaration | variable_declaration ; variable_declaration : identifier_list COLON type_denoter ; procedure_and_function_declaration_part : proc_or_func_declaration_list semicolon | ; proc_or_func_declaration_list : proc_or_func_declaration_list semicolon proc_or_func_declaration | proc_or_func_declaration ; proc_or_func_declaration : procedure_declaration | function_declaration ; procedure_declaration : procedure_heading semicolon directive | procedure_heading semicolon procedure_block ; procedure_heading : procedure_identification | procedure_identification formal_parameter_list ; directive : FORWARD | EXTERNAL ; formal_parameter_list : LPAREN formal_parameter_section_list RPAREN ; formal_parameter_section_list : formal_parameter_section_list semicolon formal_parameter_section | formal_parameter_section ; formal_parameter_section : value_parameter_specification | variable_parameter_specification | procedural_parameter_specification | functional_parameter_specification ; value_parameter_specification : identifier_list COLON identifier ; variable_parameter_specification : VAR identifier_list COLON identifier ; procedural_parameter_specification : procedure_heading ; functional_parameter_specification : function_heading ; procedure_identification : PROCEDURE identifier ; procedure_block : block ; function_declaration : function_heading semicolon directive | function_identification semicolon function_block | function_heading semicolon function_block ; function_heading : FUNCTION identifier COLON result_type | FUNCTION identifier formal_parameter_list COLON result_type ; result_type : identifier ; function_identification : FUNCTION identifier ; function_block : block ; statement_part : compound_statement ; compound_statement : PBEGIN statement_sequence END ; statement_sequence : statement_sequence semicolon statement | statement ; statement : open_statement | closed_statement ; open_statement : label COLON non_labeled_open_statement | non_labeled_open_statement ; closed_statement : label COLON non_labeled_closed_statement | non_labeled_closed_statement ; non_labeled_closed_statement : assignment_statement | procedure_statement | goto_statement | compound_statement | case_statement | repeat_statement | closed_with_statement | closed_if_statement | closed_while_statement | closed_for_statement | ; non_labeled_open_statement : open_with_statement | open_if_statement | open_while_statement | open_for_statement ; repeat_statement : REPEAT statement_sequence UNTIL boolean_expression ; open_while_statement : WHILE boolean_expression DO open_statement ; closed_while_statement : WHILE boolean_expression DO closed_statement ; open_for_statement : FOR control_variable ASSIGNMENT initial_value direction final_value DO open_statement ; closed_for_statement : FOR control_variable ASSIGNMENT initial_value direction final_value DO closed_statement ; open_with_statement : WITH record_variable_list DO open_statement ; closed_with_statement : WITH record_variable_list DO closed_statement ; open_if_statement : IF boolean_expression THEN statement | IF boolean_expression THEN closed_statement ELSE open_statement ; closed_if_statement : IF boolean_expression THEN closed_statement ELSE closed_statement ; assignment_statement : variable_access ASSIGNMENT expression ; variable_access : identifier | indexed_variable | field_designator | variable_access UPARROW ; indexed_variable : variable_access LBRAC index_expression_list RBRAC ; index_expression_list : index_expression_list comma index_expression | index_expression ; index_expression : expression ; field_designator : variable_access DOT identifier ; procedure_statement : identifier params | identifier ; params : LPAREN actual_parameter_list RPAREN ; actual_parameter_list : actual_parameter_list comma actual_parameter | actual_parameter ; #* this forces you to check all this to be sure that only write and #* writeln use the 2nd and 3rd forms, you really can't do it easily in #* the grammar, especially since write and writeln aren't reserved actual_parameter : expression | expression COLON expression | expression COLON expression COLON expression ; goto_statement : GOTO label ; case_statement : CASE case_index OF case_list_element_list END | CASE case_index OF case_list_element_list SEMICOLON END | CASE case_index OF case_list_element_list semicolon otherwisepart statement END | CASE case_index OF case_list_element_list semicolon otherwisepart statement SEMICOLON END ; case_index : expression ; case_list_element_list : case_list_element_list semicolon case_list_element | case_list_element ; case_list_element : case_constant_list COLON statement ; otherwisepart : OTHERWISE | OTHERWISE COLON ; control_variable : identifier ; initial_value : expression ; direction : TO | DOWNTO ; final_value : expression ; record_variable_list : record_variable_list comma variable_access | variable_access ; boolean_expression : expression ; expression : simple_expression | simple_expression relop simple_expression ; simple_expression : term | simple_expression addop term ; term : factor | term mulop factor ; factor : sign factor | exponentiation ; exponentiation : primary | primary STARSTAR exponentiation ; primary : variable_access | unsigned_constant | function_designator | set_constructor | LPAREN expression RPAREN | NOT primary ; unsigned_constant : unsigned_number | CHARACTER_STRING | NIL ; unsigned_number : unsigned_integer | unsigned_real ; unsigned_integer : DIGSEQ ; unsigned_real : REALNUMBER ; /* functions with no params will be handled by plain identifier */ function_designator : identifier params ; set_constructor : LBRAC member_designator_list RBRAC | LBRAC RBRAC ; member_designator_list : member_designator_list comma member_designator | member_designator ; member_designator : member_designator DOTDOT expression | expression ; addop: PLUS | MINUS | OR ; mulop : STAR | SLASH | DIV | MOD | AND ; relop : EQUAL | NOTEQUAL | LT | GT | LE | GE | IN ; identifier : IDENTIFIER ; semicolon : SEMICOLON ; comma : COMMA ; %% use Carp; use Getopt::Long; my %keywords = ( AND => 'AND', ARRAY => 'ARRAY', CASE => 'CASE', CONST => 'CONST', DIV => 'DIV', DO => 'DO', DOWNTO => 'DOWNTO', ELSE => 'ELSE', END => 'END', EXTERN => 'EXTERNAL', EXTERNAL => 'EXTERNAL', FOR => 'FOR', FORWARD => 'FORWARD', FUNCTION => 'FUNCTION', GOTO => 'GOTO', IF => 'IF', IN => 'IN', LABEL => 'LABEL', MOD => 'MOD', NIL => 'NIL', NOT => 'NOT', OF => 'OF', OR => 'OR', OTHERWISE => 'OTHERWISE', PACKED => 'PACKED', BEGIN => 'PBEGIN', FILE => 'PFILE', PROCEDURE => 'PROCEDURE', PROGRAM => 'PROGRAM', RECORD => 'RECORD', REPEAT => 'REPEAT', SET => 'SET', THEN => 'THEN', TO => 'TO', TYPE => 'TYPE', UNTIL => 'UNTIL', VAR => 'VAR', WHILE => 'WHILE', WITH => 'WITH', ); my %lexeme = ( ':=' => 'ASSIGNMENT', ':' => 'COLON', ',' => 'COMMA', '.' => 'DOT', '..' => 'DOTDOT', '=' => 'EQUAL', '>=' => 'GE', '>' => 'GT', '[' => 'LBRAC', '<=' => 'LE', '(' => 'LPAREN', '<' => 'LT', '-' => 'MINUS', '<>' => 'NOTEQUAL', '+' => 'PLUS', ']' => 'RBRAC', ')' => 'RPAREN', ';' => 'SEMICOLON', '/' => 'SLASH', '*' => 'STAR', '**' => 'STARSTAR', '->' => 'UPARROW', '^' => 'UPARROW', ); my ($tokenbegin, $tokenend) = (1, 1); sub _Lexer { my($parser)=shift; my $token; for ($parser->{INPUT}) { return('',undef) if !defined($_) or $_ eq ''; #Skip blanks and comments s{\A ((?: \s+ # any white space char | \(\*.*?\*\) # (*.. *) comments | \{.*?\} # { .. } comments )+ ) } {}xs and do { my($blanks)=$1; #Maybe At EOF return('', undef) if $_ eq ''; $tokenend += $blanks =~ tr/\n//; }; $tokenbegin = $tokenend; s{^([0-9]+(\.[0-9]+)?)}{} and do { return ('DIGSEQ', [$1, $tokenbegin]) unless defined($2); return ('REALNUMBER', [$1, $tokenbegin]); }; s{^(\'(\\.|[^\\'])*\')}{} and return('CHARACTER_STRING', [$1, $tokenbegin]); s/^([a-zA-Z_][A-Za-z0-9_]*)// and do { my $word = uc($1); my $r; return ($r, [$r, $tokenbegin]) if defined($r = $keywords{$word}); return('IDENTIFIER',[$word, $tokenbegin]); }; m/^(\S\S)/ and defined($token = $1) and exists($lexeme{$token}) and do { s/..//; return ($lexeme{$token}, [$token, $tokenbegin]); }; # do m/^(\S)/ and defined($token = $1) and exists($lexeme{$token}) and do { s/.//; return ($lexeme{$token}, [$token, $tokenbegin]); }; # do die "Unexpected character at $tokenbegin\n"; } # for } sub _Error { my($token)=$_[0]->YYCurval; my($what)= $token ? "input: '$token->[0]' in line $token->[1]" : "end of input"; my @expected = $_[0]->YYExpect(); my $expected = @expected? "Expected one of these tokens: '@expected'":""; croak "Syntax error near $what. $expected\n"; } sub Run { my($self)=shift; my $yydebug = shift || 0; return $self->YYParse( yylex => \&_Lexer, yyerror => \&_Error, yydebug => $yydebug, # 0x1F ); } sub uploadfile { my $file = shift; my $msg = shift; my $input = ''; eval { $input = Parse::Eyapp::Base::slurp_file($file) }; if ($@) { print $msg; local $/ = undef; $input = ; } return $input; } sub main { my $package = shift; my $debug = 0; my $file = ''; my $result = GetOptions ( "debug!" => \$debug, "file=s" => \$file, ); $debug = 0x1F if $debug; $file = shift if !$file && @ARGV; my $parser = $package->new(); my $prompt = "Expressions. Press CTRL-D (Unix) or CTRL-Z (Windows) to finish:\n"; $parser->{INPUT} = uploadfile($file, $prompt); $Parse::Eyapp::Node::INDENT = 2; print $parser->Run( $debug )->str,"\n"; } sub TERMINAL::info { $_[0]->{attr}[0] }; __PACKAGE__->main unless caller(); =head1 NAME Pascal eyapp grammar =head1 LIMITATIONS A parameter declarations must be followed by an identifer. A declaration like: procedure one (i, j : integer; k : array [1..5] of real); instead we can do: type arrreal5 = array [1..5] of real; procedure one (i, j : integer; k : arrreal5); =cut