/* * PIR.eyp * * Intermediate Code Compiler for Parrot. * * Copyright (C) 2002 Melvin Smith * Copyright (C) 2002-2009, Parrot Foundation. * * Grammar of the PIR language parser. * Casiano: cleaned semantic actions to study the grammar with Eyapp. 2009 */ %strict /* We need precedence for a few tokens to resolve a couple of conflicts */ /* Commented tokens look as if they weren't used */ %nonassoc LOW_PREC %nonassoc '\n' %nonassoc PARAM %token HLL TK_LINE TK_FILE %token GOTO ARG IF UNLESS PNULL SET_RETURN SET_YIELD %token ADV_FLAT ADV_SLURPY ADV_OPTIONAL ADV_OPT_FLAG ADV_NAMED ADV_ARROW #%token NEW #%token ADV_INVOCANT %token ADV_CALL_SIG %token NAMESPACE %token SUB LOCAL LEXICAL CONST ANNOTATE #%token INC SYM DOT_METHOD DEC %token GLOBAL_CONST %token PLUS_ASSIGN MINUS_ASSIGN MUL_ASSIGN DIV_ASSIGN CONCAT_ASSIGN %token BAND_ASSIGN BOR_ASSIGN BXOR_ASSIGN FDIV FDIV_ASSIGN MOD_ASSIGN %token SHR_ASSIGN SHL_ASSIGN SHR_U_ASSIGN %token SHIFT_LEFT SHIFT_RIGHT INTV FLOATV STRINGV PMCV LOG_XOR %token RELOP_EQ RELOP_NE RELOP_GT RELOP_GTE RELOP_LT RELOP_LTE %token RESULT RETURN TAILCALL YIELDT GET_RESULTS %token POW SHIFT_RIGHT_U LOG_AND LOG_OR %token COMMA ESUB #%token DOTDOT ENDM %token PCC_BEGIN PCC_END PCC_CALL PCC_SUB PCC_BEGIN_RETURN PCC_END_RETURN %token PCC_BEGIN_YIELD PCC_END_YIELD NCI_CALL METH_CALL INVOCANT %token MAIN LOAD INIT IMMEDIATE POSTCOMP METHOD ANON OUTER NEED_LEX %token MULTI VTABLE_METHOD LOADLIB SUB_INSTANCE_OF SUBID %token NS_ENTRY %token UNIQUE_REG %token LABEL %token EMIT EOM %token IREG NREG SREG PREG IDENTIFIER REG MACRO %token STRINGC INTC FLOATC USTRINGC %token PARROT_OP %type type hll_def return_or_yield comma_or_goto opt_unique_reg %type program %type class_namespace %type constdef sub emit pcc_ret pcc_yield %type compilation_units compilation_unit pmc_const pragma %type classname relop any_string assign_op bin_op un_op %type labels _labels label statement sub_call %type pcc_sub_call %type sub_param sub_params pcc_arg pcc_result pcc_args pcc_results sub_param_type_def %type pcc_returns pcc_yields pcc_return pcc_call arg arglist the_sub multi_type %type argtype_list argtype paramtype_list paramtype %type pcc_return_many %type proto sub_proto sub_proto_list multi multi_types outer %type vtable instanceof subid %type method ns_entry_name %type instruction assignment conditional_statement labeled_inst opt_label op_assign %type if_statement unless_statement %type func_assign get_results %type opt_invocant %type annotate_directive %type target targetlist reg const var result pcc_set_yield %type keylist keylist_force _keylist key maybe_ns %type vars _vars var_or_i _var_or_i label_op sub_label_op sub_label_op_c %type pasmcode pasmline pasm_inst %type pasm_args %type var_returns %token VAR %token LINECOMMENT %token FILECOMMENT %type id_list id_list_id %nonassoc CONCAT DOT %{ my $LETTER = qr{[a-zA-Z_\@]}; my $DIGIT = qr{[0-9]}; my $DIGITS = qr{(?:$DIGIT)+}; my $HEX = qr{0[xX][0-9A-Fa-f]+}; my $OCT = qr{0[oO][0-7]+}; my $BIN = qr{0[bB][01]+}; my $DOT = qr{[.]}; my $SIGN = qr{[-+]}; my $BIGINT = qr{(?:$SIGN)?$DIGITS"L"}; my $FLOATNUM = qr{(?:$SIGN)?(?:(?:$DIGITS$DOT(?:$DIGIT)*|$DOT$DIGITS)(?:[eE](?:$SIGN)?$DIGITS)?|$DIGITS[eE](?:$SIGN)?$DIGITS)}; my $LETTERDIGIT = qr{[a-zA-Z0-9_]}; my $LABELLETTERDIGIT = qr{(?:[a-zA-Z0-9_@])}; my $ID = qr{$LETTER(?:$LABELLETTERDIGIT)*}; my $DQ_STRING = qr{\"(?:\\.|[^"\\\n])*\"}; my $ENCCHAR = qr{$LETTER|$DIGIT|"-"}; my $ENCCHARS = qr{(?:$ENCCHAR)*}; my $ENC = qr{$LETTER$ENCCHARS":"}; my $UNICODE = qr{$ENC(?:$ENC)?$DQ_STRING}; my $SQ_STRING = qr{\'[^'\n]*\'}; my $STRINGCONSTANT = qr{$SQ_STRING|$DQ_STRING}; my $RANKSPEC = qr{\[[,]*\]}; my $EOL = qr{\r?\n}; my $WS = qr{[\t\f\r\x1a ]}; my $SP = qr{[ ]}; %} %start program %tree %% program: compilation_units ; compilation_units: compilation_unit | compilation_units compilation_unit ; compilation_unit: class_namespace | constdef | sub | emit | MACRO '\n' | pragma | location_directive | '\n' ; pragma: hll_def '\n' | LOADLIB STRINGC '\n' ; location_directive: TK_LINE INTC COMMA STRINGC '\n' | TK_FILE STRINGC '\n' ; annotate_directive: ANNOTATE STRINGC COMMA const ; hll_def: HLL STRINGC ; constdef: CONST type IDENTIFIER '=' const ; pmc_const: CONST INTC var_or_i '=' any_string | CONST STRINGC var_or_i '=' any_string ; any_string: STRINGC | USTRINGC ; pasmcode: pasmline | pasmcode pasmline ; pasmline: labels pasm_inst '\n' | MACRO '\n' | FILECOMMENT | LINECOMMENT | class_namespace | pmc_const | pragma ; pasm_inst: PARROT_OP pasm_args | PCC_SUB sub_proto LABEL | PNULL var | LEXICAL STRINGC COMMA REG | /* empty */ ; pasm_args: vars ; emit: /* EMIT and EOM tokens are used when compiling a .pasm file. */ EMIT opt_pasmcode EOM ; opt_pasmcode: /* empty */ | pasmcode ; class_namespace: NAMESPACE maybe_ns '\n' ; maybe_ns: '[' keylist ']' | '[' ']' ; sub: SUB sub_label_op_c sub_proto '\n' sub_params sub_body ESUB ; sub_params: /* empty */ %prec LOW_PREC | '\n' | sub_params sub_param '\n' ; sub_param: PARAM sub_param_type_def ; sub_param_type_def: type IDENTIFIER paramtype_list ; multi: MULTI '(' multi_types ')' ; outer: OUTER '(' STRINGC ')' | OUTER '(' IDENTIFIER ')' ; vtable: VTABLE_METHOD | VTABLE_METHOD '(' STRINGC ')' ; method: METHOD | METHOD '(' any_string ')' ; ns_entry_name: NS_ENTRY | NS_ENTRY '(' any_string ')' ; instanceof: SUB_INSTANCE_OF '(' STRINGC ')' ; subid: SUBID | SUBID '(' any_string ')' ; multi_types: /* empty */ | multi_types COMMA multi_type | multi_type ; multi_type: INTV | FLOATV | PMCV | STRINGV | IDENTIFIER | STRINGC | '[' keylist ']' ; sub_body: /* empty */ | statements ; pcc_sub_call: PCC_BEGIN '\n' pcc_args opt_invocant pcc_call opt_label pcc_results PCC_END ; opt_label: /* empty */ | label '\n' ; opt_invocant: /* empty */ | INVOCANT var '\n' ; sub_proto: /* empty */ | sub_proto_list ; sub_proto_list: proto | sub_proto_list proto ; proto: LOAD | INIT | MAIN | IMMEDIATE | POSTCOMP | ANON | NEED_LEX | multi | outer | vtable | method | ns_entry_name | instanceof | subid ; pcc_call: PCC_CALL var COMMA var '\n' | PCC_CALL var '\n' | NCI_CALL var '\n' | METH_CALL target '\n' | METH_CALL STRINGC '\n' | METH_CALL target COMMA var '\n' | METH_CALL STRINGC COMMA var '\n' ; pcc_args: /* empty */ | pcc_args pcc_arg '\n' ; pcc_arg: ARG arg ; pcc_results: /* empty */ | pcc_results pcc_result '\n' ; pcc_result: RESULT target paramtype_list | LOCAL type id_list_id ; paramtype_list: /* empty */ | paramtype_list paramtype ; paramtype: ADV_SLURPY | ADV_OPTIONAL | ADV_OPT_FLAG | ADV_NAMED | ADV_NAMED '(' STRINGC ')' | ADV_NAMED '(' USTRINGC ')' | UNIQUE_REG | ADV_CALL_SIG ; pcc_ret: PCC_BEGIN_RETURN '\n' pcc_returns PCC_END_RETURN | pcc_return_many ; pcc_yield: PCC_BEGIN_YIELD '\n' pcc_yields PCC_END_YIELD ; pcc_returns: /* empty */ | pcc_returns '\n' | pcc_returns pcc_return '\n' ; pcc_yields: /* empty */ | pcc_yields '\n' | pcc_yields pcc_set_yield '\n' ; pcc_return: SET_RETURN var argtype_list ; pcc_set_yield: SET_YIELD var argtype_list ; pcc_return_many: return_or_yield '(' var_returns ')' ; return_or_yield: RETURN | YIELDT ; var_returns: /* empty */ | arg | STRINGC ADV_ARROW var | var_returns COMMA arg | var_returns COMMA STRINGC ADV_ARROW var ; statements: statement | statements statement ; /* This is ugly. Because 'instruction' can start with PARAM and in the * 'pcc_sub' rule, 'pcc_params' is followed by 'statement', we get a * shift/reduce conflict on PARAM between reducing to the dummy * rule and shifting the PARAM to be used as part * of the 'pcc_params' (which is what we want). However, yacc syntax * doesn't propagate precedence to the dummy rules, so we have to * split out the action just so that we can assign it a precedence. */ helper_clear_state: %prec LOW_PREC ; statement: helper_clear_state instruction | MACRO '\n' | FILECOMMENT | LINECOMMENT | location_directive | annotate_directive ; labels: /* none */ | _labels ; _labels: _labels label | label ; label: LABEL ; instruction: labels labeled_inst '\n' | error '\n' ; id_list : id_list_id | id_list COMMA id_list_id ; id_list_id : IDENTIFIER opt_unique_reg ; opt_unique_reg: /* empty */ | UNIQUE_REG ; labeled_inst: assignment | conditional_statement | LOCAL type id_list | LEXICAL STRINGC COMMA target | LEXICAL USTRINGC COMMA target | CONST type IDENTIFIER '=' const | pmc_const | GLOBAL_CONST type IDENTIFIER '=' const | TAILCALL sub_call | GOTO label_op | PARROT_OP vars | PNULL var | sub_call | pcc_sub_call | pcc_ret | pcc_yield | /* none */ ; type: INTV | FLOATV | STRINGV | PMCV ; classname: IDENTIFIER ; assignment: target '=' var | target '=' un_op var | target '=' var bin_op var | target '=' var '[' keylist ']' | target '[' keylist ']' '=' var | target '=' 'new' classname '[' keylist ']' /* Subroutine call the short way */ | target '=' sub_call | '(' targetlist ')' '=' the_sub '(' arglist ')' | get_results | op_assign | func_assign | target '=' PNULL ; un_op: '!' | '~' | '-' ; bin_op: '-' | '+' | '*' | '/' | '%' | FDIV | POW | CONCAT | RELOP_EQ | RELOP_NE | RELOP_GT | RELOP_GTE | RELOP_LT | RELOP_LTE | SHIFT_LEFT | SHIFT_RIGHT | SHIFT_RIGHT_U | LOG_AND | LOG_OR | LOG_XOR | '&' | '|' | '~' ; get_results: GET_RESULTS '(' targetlist ')' ; op_assign: target assign_op var ; assign_op: PLUS_ASSIGN | MINUS_ASSIGN | MUL_ASSIGN | DIV_ASSIGN | MOD_ASSIGN | FDIV_ASSIGN | CONCAT_ASSIGN | BAND_ASSIGN | BOR_ASSIGN | BXOR_ASSIGN | SHR_ASSIGN | SHL_ASSIGN | SHR_U_ASSIGN ; func_assign: target '=' PARROT_OP pasm_args ; the_sub: IDENTIFIER | STRINGC | USTRINGC | target | target DOT sub_label_op | target DOT USTRINGC | target DOT STRINGC | target DOT target ; sub_call: the_sub '(' arglist ')' ; arglist: /* empty */ | arglist COMMA arg | arg | arglist COMMA STRINGC ADV_ARROW var | var ADV_ARROW var | STRINGC ADV_ARROW var ; arg: var argtype_list ; argtype_list: /* empty */ | argtype_list argtype ; argtype: ADV_FLAT | ADV_NAMED | ADV_CALL_SIG | ADV_NAMED '(' USTRINGC ')' | ADV_NAMED '(' STRINGC ')' ; result: target paramtype_list ; targetlist: targetlist COMMA result | targetlist COMMA STRINGC ADV_ARROW target | result | STRINGC ADV_ARROW target | /* empty */ ; conditional_statement: if_statement | unless_statement ; unless_statement: UNLESS var relop var GOTO label_op | UNLESS PNULL var GOTO label_op | UNLESS var comma_or_goto label_op ; if_statement: IF var comma_or_goto label_op | IF var relop var GOTO label_op | IF PNULL var GOTO label_op ; comma_or_goto: COMMA | GOTO ; relop: RELOP_EQ | RELOP_NE | RELOP_GT | RELOP_GTE | RELOP_LT | RELOP_LTE ; target: VAR | reg ; vars: /* empty */ | _vars ; _vars: _vars COMMA _var_or_i | _var_or_i ; _var_or_i: var_or_i | target '[' keylist ']' | '[' keylist_force ']' ; sub_label_op_c: sub_label_op | STRINGC | USTRINGC ; sub_label_op: IDENTIFIER | PARROT_OP ; label_op: IDENTIFIER | PARROT_OP ; var_or_i: label_op | var ; var: target | const ; keylist: _keylist ; keylist_force: _keylist ; _keylist: key | _keylist ';' key ; key: var ; reg: IREG | NREG | SREG | PREG | REG ; const: INTC | FLOATC | STRINGC | USTRINGC ; /* The End */ %% my %directives = ( ".set_arg" => 'ARG', ".sub" => 'SUB', ".end" => 'ESUB', ".begin_call" => 'PCC_BEGIN', ".end_call" => 'PCC_END', ".call" => 'PCC_CALL', ".nci_call" => 'NCI_CALL', ".meth_call" => 'METH_CALL', ".invocant" => 'INVOCANT', ".begin_return" => 'PCC_BEGIN_RETURN', ".end_return" => 'PCC_END_RETURN', ".begin_yield" => 'PCC_BEGIN_YIELD', ".end_yield" => 'PCC_END_YIELD', ".get_result" => 'RESULT', ".get_results" => 'GET_RESULTS', ".yield" => 'YIELDT', ".set_yield" => 'SET_YIELD', ".return" => 'RETURN', ".set_return" => 'SET_RETURN', ".tailcall" => 'TAILCALL', ".local" => 'LOCAL', ".globalconst" => 'GLOBAL_CONST', ".param" => 'PARAM', ); my %colons = ( ":unique_reg" => 'UNIQUE_REG', ":instanceof" => 'SUB_INSTANCE_OF', ":subid" => 'SUBID', ":flat" => 'ADV_FLAT', ":slurpy" => 'ADV_SLURPY', ":optional" => 'ADV_OPTIONAL', ":opt_flag" => 'ADV_OPT_FLAG', ":named" => 'ADV_NAMED', ":invocant" => 'ADV_INVOCANT', ":call_sig" => 'ADV_CALL_SIG', ); my %words = ( "goto" => 'GOTO', "if" => 'IF', "unless" => 'UNLESS', "null" => 'PNULL', "int" => 'INTV', "num" => 'FLOATV', "pmc" => 'PMCV', "string" => 'STRINGV', ); my %lexemes = ( "=>" => 'ADV_ARROW', "<<" => 'SHIFT_LEFT', ">>" => 'SHIFT_RIGHT', ">>>" => 'SHIFT_RIGHT_U', "&&" => 'LOG_AND', "||" => 'LOG_OR', "~~" => 'LOG_XOR', "<" => 'RELOP_LT', "<=" => 'RELOP_LTE', ">" => 'RELOP_GT', ">=" => 'RELOP_GTE', "==" => 'RELOP_EQ', "!=" => 'RELOP_NE', "**" => 'POW', "." => 'DOT', "+=" => 'PLUS_ASSIGN', "-=" => 'MINUS_ASSIGN', "*=" => 'MUL_ASSIGN', "/=" => 'DIV_ASSIGN', "%=" => 'MOD_ASSIGN', "//" => 'FDIV', "//=" => 'FDIV_ASSIGN', "&=" => 'BAND_ASSIGN', "|=" => 'BOR_ASSIGN', "~=" => 'BXOR_ASSIGN', ">>=" => 'SHR_ASSIGN', "<<=" => 'SHL_ASSIGN', ">>>=" => 'SHR_U_ASSIGN', ".=" => 'CONCAT_ASSIGN', );