# Copyright (C) 2007-2008, Parrot Foundation. # $Id: Grammar.pir 36833 2009-02-17 20:09:26Z allison $ =head1 NAME PCT::Grammar - base grammar with useful rules =head1 SYNOPSIS grammar MyGrammar is PCT::Grammar; rule abc { [ word | ] } rule quote { [ \' \' | \" \" ] } =head1 DESCRIPTION This file implements C, which is a basic grammar object with a few useful methods for parsing thrown in. =head2 Methods =over 4 =item panic(match [, message, ...] ) Throws an exception at the current point of the match, with message as part of the exception payload. The message doesn't end with a newline, then the line number and offset of the match are also included. =cut .namespace [ 'PCT';'Grammar' ] .sub 'onload' :anon :init :load load_bytecode 'PGE.pbc' load_bytecode 'PGE/Util.pbc' .local pmc p6meta p6meta = new 'P6metaclass' p6meta.'new_class'('PCT::Grammar', 'parent'=>'PGE::Grammar') $P0 = split '::', 'PCT::Grammar' $P0 = get_class $P0 $P1 = get_hll_global ['PGE';'Util'], 'die' $P0.'add_method'('panic', $P1) .return () .end =item FAILGOAL($goal [, 'dba'=>dba]) Invoked when goal matching fails to find the goal. Builds an appropriate error message and delegates the rest to C. =cut .sub 'FAILGOAL' :method .param string goal .param pmc options :named :slurpy .local string dba dba = options['dba'] if dba goto have_dba ## if no dba supplied, use the name of the caller sub $P0 = getinterp $P0 = $P0['sub';1] dba = $P0 have_dba: .tailcall self.'panic'("Unable to parse ", dba, "; couldn't find final ", goal) .end =item item() Here we overload the item() method from PGE::Match to throw an exception if a result object hasn't been set. =cut .sub 'item' :method .local pmc obj obj = getattribute self, '$!item' unless null obj goto end die "No result object" end: .return (obj) .end =item ww() Special-purpose rule to return true if we're in the middle of a word -- i.e., if the previous and next character are both "word characters". This is roughly equivalent to C<< >> except it's much quicker. In particular, C<< >> can be used by :sigspace rules to enforce whitespace between lexical words. =cut .include 'cclass.pasm' .sub 'ww' :method .local pmc mob .local int pos .local string target $P0 = get_hll_global ['PGE'], 'Match' (mob, pos, target) = $P0.'new'(self) if pos == 0 goto fail $I0 = is_cclass .CCLASS_WORD, target, pos unless $I0 goto fail $I1 = pos - 1 $I0 = is_cclass .CCLASS_WORD, target, $I1 unless $I0 goto fail mob.'to'(pos) fail: .return (mob) .end .sub 'string_literal' :method .param string stop .param pmc adverbs :slurpy :named ## create a new match object, get the new match position .local pmc mob .local int pos, lastpos, stoplen .local string target, escapechars (mob, pos, target) = self.'new'(self) lastpos = length target stoplen = length stop $S0 = substr stop, 0, 1 escapechars = concat "\\", $S0 ## leave space for close delimiter lastpos -= stoplen ## now initialize and loop through target literal_init: .local string literal, litchar literal = '' literal_loop: ## if we're beyond the last possible position, fail if pos > lastpos goto fail ## if ending delimiter, then we're done $S0 = substr target, pos, stoplen if $S0 == stop goto literal_end if pos >= lastpos goto fail ## get next character in literal litchar = substr target, pos, 1 inc pos ## add non-escape characters to literal if litchar != "\\" goto add_litchar ## look at the next character, if it's always escaped, add it and ## move on .local string escaped escaped = substr target, pos, 1 $I0 = index escapechars, escaped if $I0 < 0 goto interpolated_escape inc pos literal .= escaped goto literal_loop interpolated_escape: ## if not double-quoted delim, no interpolation if stop != '"' goto add_litchar litchar = escaped inc pos $I0 = index "abefnrt0xdo", litchar if $I0 < 0 goto add_litchar ## if it's one of "xdo", then handle that specially if $I0 >= 8 goto literal_xdo litchar = substr "\a\b\e\f\n\r\t\0", $I0, 1 goto add_litchar literal_xdo: ## handle \x, \d, and \o escapes. start by converting ## the 'o', 'd', or 'x' into 8, 10, or 16 (yes, it's hack ## but it works). Then loop through the characters that ## follow to compute the integer value of the codepoint, ## and add that codepoint to our literal. .local int base, codepoint, isbracketed base = index ' o d x', litchar codepoint = 0 $S0 = substr target, pos, 1 isbracketed = iseq $S0, '[' pos += isbracketed literal_xdo_char_loop: $S0 = substr target, pos, 1 $I0 = index '0123456789abcdef', $S0 if $I0 < 0 goto literal_xdo_char_end if $I0 >= base goto literal_xdo_char_end codepoint *= base codepoint += $I0 inc pos goto literal_xdo_char_loop literal_xdo_char_end: $S1 = chr codepoint concat literal, $S1 unless isbracketed goto literal_xdo_end if $S0 == ']' goto literal_xdo_end if $S0 != ',' goto fail inc pos codepoint = 0 goto literal_xdo_char_loop literal_xdo_end: pos += isbracketed goto literal_loop add_litchar: literal .= litchar goto literal_loop literal_end: mob.'to'(pos) mob.'result_object'(literal) .return (mob) fail: mob.'to'(-1) .return (mob) .end # Local Variables: # mode: pir # fill-column: 100 # End: # vim: expandtab shiftwidth=4 ft=pir: