#============================================================================ # # Text::MetaText # # DESCRIPTION # Perl 5 module to process template files, featuring variable # substitution, file inclusion, conditional operations, print # filters and formatting, etc. # # AUTHOR # Andy Wardley # # COPYRIGHT # Copyright (C) 1996-1998 Andy Wardley. All Rights Reserved. # # This module is free software; you can redistribute it and/or # modify it under the terms of the Perl Artistic Licence. # #---------------------------------------------------------------------------- # # $Id: MetaText.pm,v 0.22 1998/09/01 11:23:14 abw Exp abw $ # #============================================================================ package Text::MetaText; use strict; use FileHandle; use Date::Format; use vars qw( $VERSION $FACTORY $ERROR ); use Text::MetaText::Factory; require 5.004; #======================================================================== # ----- CONFIGURATION ----- #======================================================================== $VERSION = sprintf("%d.%02d", q$Revision: 0.22 $ =~ /(\d+)\.(\d+)/); $FACTORY = 'Text::MetaText::Factory'; # debug level constants (debugging will get nicer one day RSN) use constant DBGNONE => 0; # no debugging use constant DBGINFO => 1; # information message only use constant DBGCONF => 2; # configuration details use constant DBGPREP => 4; # show pre-processor operations use constant DBGPROC => 8; # show process operation use constant DBGPOST => 16; # show post-process operation use constant DBGDATA => 32; # show data elements (parameters) use constant DBGCONT => 64; # show content of blocks use constant DBGFUNC => 128; # private method calls use constant DBGEVAL => 256; # show conditional evaluation steps use constant DBGTEST => 512; # test code use constant DBGALL => 1023; # all debug information my $DBGNAME = { 'none' => DBGNONE, 'info' => DBGINFO, 'config' => DBGCONF, 'preproc' => DBGPREP, 'process' => DBGPROC, 'postproc' => DBGPOST, 'data' => DBGDATA, 'content' => DBGCONT, 'function' => DBGFUNC, 'evaluate' => DBGEVAL, 'test' => DBGTEST, 'all' => DBGALL, }; #======================================================================== # ----- PUBLIC METHODS ----- #======================================================================== #======================================================================== # # new($cfg) # # Module constructor. Reference to a hash array containing configuration # options may be passed as a parameter. This is passed off to # _configure() for processing. # # Returns a reference to a newly created Text::MetaText object. # #======================================================================== sub new { my $class = shift; my $self = {}; bless $self, $class; $self->_configure(@_); return $self; } #======================================================================== # # process_file($file, \%tags) # # Public method for processing files. Calls _parse_file($file) to # parse and load the file into the symbol table (indexed by $file) # and then calls $self->_process($file, $tags) to process the symbol # table entry and generate output. The optional $tags parameter may be # used to refer to a hash array of pre-defined variables which should be # used when processing the file. # # Returns the result of $self->_process($file, $tags) which may be undef # to indicate a processing error. May also return undef to indicate a # parse error. On success, a text string is returned which contains the # output of the process stage. # #======================================================================== sub process_file { my $self = shift; my $file = shift; $self->_DEBUG(DBGFUNC, "process_file($file, %s)\n", join(", ", @_)); # parse the file into the symbol table if it's not already there unless ($self->_symbol_defined($file)) { return undef unless defined $self->_parse_file($file); } # call _process to do the real processing and implicitly return result $self->_process($file, @_); } #======================================================================== # # process_text($text, \%tags) # # Public method for processing text strings. Calls _parse_text($text) to # parse the string and return a reference to an anonymous array, $block, # which represents the parsed text string, separated by newlines. This # is then passed to $self->_process($block, @_) along with any other # parameters passed in to process_text(), such as $tags which is a # reference to a hash array of pre-defined variables. # # Returns the result of $self->_process($block, $tags) which may be undef # to indicate a processing error. May also return undef to indicate a # parse error. On success, a text string is returned which contains the # output of the process stage. # #======================================================================== sub process_text { my $self = shift; my $text = shift; my $block; $self->_DEBUG(DBGFUNC, "process_text($text, ", join(", ", @_), ")\n"); # parse the text and store the returned block array return undef unless defined($block = $self->_parse_text($text)); # call _process to do the real processing and implicitly return result $self->_process($block, @_); } #======================================================================== # # process($file, \%tags) # # Alias for 'process_file(@_)' which is provided for backward # compatibility with older MetaText versions. # #======================================================================== sub process { my $self = shift; $self->process_file(@_); } #======================================================================== # # declare($input, $name) # # Public method which allows text blocks and pre-compiled directive # arrays to be installed in the symbol table for subsequent use in # %% INCLUDE %% directives. # # In the simplest case, $input is a text string (i.e. any scalar) which # may contain embedded MetaText directives. This is parsed using the # _parse_text($input, $name) method which creates a parsed directive # list which is subsequently installed in the symbol table, indexed by # $name. Subsequent directives of the form "%% INCLUDE $name %%" will # then correctly resolve the cached contents parsed from the text string. # # $input may also be a reference to an array of text strings and/or # MetaText directive objects. These are instances of the # Text::MetaText::Directive class, or sub-classes thereof. If you know # how to instantiate directive objects directly, then you can store # "pre-compiled" blocks straight into the symbol table using this method. # This can significantly speed up processing times for complex, # dynamically contructed blocks by totally elimiating the parsing stage. # # The MetaText Directive class will shortly be updated (beyond 0.2) # to make this process easier. At that point, the Directive documentation # will updated to better explain this process. In the mean time, don't # worry if you don't understand this - you're probably not one of the # two people who specifically needed this feature :-) # # Returns 1 if the symbol table entry was correctly defined. If a parse # error occurs (when parsing a text string), an error is raised and # undef is returned. # #======================================================================== sub declare { my $self = shift; my $input = shift; my $name = shift; my $ref; # is $input a reference of some kind? if ($ref = ref($input)) { # $input may be an array ref of text/directives $ref eq 'ARRAY' && do { # get a symbol table entry reference my $symtabent = $self->_symbol_entry($name); # clear any existing symbol table entry and push new content splice(@$symtabent, 0) if scalar @$symtabent; push(@$symtabent, @$input); # no problem return 1; }; # $input may (in the future) be other kinds of refs... $self->_error("Invalid input reference passed to declare()"); } else { # $input is not a reference so we assume it is text; we call # _parse_text($input, $name) to handle it but we do *not* # directly propagate the return value which is a direct reference # to the symbol table entry; data encapsulation and all that return $self->_parse_text($input, $name) ? 1 : undef; } } #======================================================================== # # error() # # Public method returning contents of internal ERROR string. # #======================================================================== sub error { my $self = shift; return $self->{ ERROR }; } #======================================================================== # ----- PRIVATE METHODS ----- #======================================================================== #======================================================================== # # _configure($cfg) # # Configuration method which examines the elements in the hash array # referenced by $cfg and sets the object's internal state accordingly. # Errors/warnings are reported via $self->_warn(); # #======================================================================== sub _configure { my $self = shift; my $cfg = shift; # initialise class data members $self->{ SYMTABLE } = {}; $self->{ LINES } = []; $self->{ ERROR } = ''; # error string (not ERRORFN!) # set configuration defaults $self->{ DEBUGLEVEL } = DBGNONE; # DEBUG mask $self->{ MAGIC } = [ '%%', '%%' ]; # directive delimiters $self->{ MAXDEPTH } = 32; # maximum recursion depth $self->{ LIB } = ""; # library path for INCLUDE $self->{ ROGUE } = {}; # how to handle rogue directives $self->{ CASE } = 0; # case sensitivity flag $self->{ CASEVARS } = {}; # case sensitive variables $self->{ CHOMP } = 0; # chomp straggling newlines $self->{ TRIM } = 1; # trim INCLUDE leading/trailing newlines $self->{ EXECUTE } = 0; # execute SUBST as function? $self->{ DELIMITER } = ','; # what splits a list? $self->{ FILTER } = { # pre-defined filters 'sr' => sub { my $m1 = $_[2] || ''; my $m2 = $_[3] || ''; $_[1] =~ s/$m1/$m2/g; $_[1]; }, 'escape' => sub { my $cm = $_[2] || ''; $_[1] =~ s/($cm)/\\$1/g; $_[1]; }, }; # the config hash array reference, $cfg, may contain a number of # different config options. These are examined case-insensitively # (but converted to UPPER CASE when stored) and, depending on the # option, tested for correctness, manipulated or massaged in some # way; invalid options generate a warning. return unless defined $cfg; # check a hash ref was supplied as $cfg unless (ref($cfg) eq 'HASH') { $self->_warn(ref($self) . "->new expects a hash array reference\n"); return; }; foreach (keys %$cfg) { # set simple config values (converting keyword to UPPER case) /^(MAXDEPTH|LIB|DELIMITER|CASE|CHOMP|TRIM|EXECUTE)$/i && do { $self->{ "\U$_" } = $cfg->{ $_ }; next; }; # add any user-defined print filters to the pre-defined ones /^FILTER$/i && do { my $filter; foreach $filter (keys %{$cfg->{ $_ }}) { $self->{ "\U$_" }->{ $filter } = $cfg->{ $_ }->{ $filter }; } next; }; # debuglevel is defined as a series of non-word delimited words # which index into the $DBGNAME hash ref for values /^DEBUGLEVEL$/i && do { foreach (split(/\W+/, $cfg->{ $_ })) { $self->_warn("Invalid debug option: $_\n"), next unless defined($DBGNAME->{ $_ }); # logically OR in the new debug value $self->{ DEBUGLEVEL } |= $DBGNAME->{ $_ }; } next; }; # ROGUE defines how unrecognised (rogue) directives should # be handled. /^ROGUE$/i && do { # create a hash reference of valid ROGUE options and # print a warning message about invalid options foreach my $rogue (split(/\W+/, $cfg->{ $_ })) { if ($rogue =~ /^warn|delete$/i) { $self->{ ROGUE }->{ uc $rogue } = 1; } else { $self->_warn("Invalid rogue option: \L$_\n"); } } next; }; # CASEVARS are those variables which don't get folded to lower # case when case sensitivity is turned off. This is useful for # metapage which likes to define some "system" variables in # UPPER CASE such as FILETIME, FILENAME, etc. /^CASEVARS$/i && do { if (ref($cfg->{ $_ }) eq 'ARRAY') { foreach my $var (@{ $cfg->{ $_ } }) { $self->{ CASEVARS }->{ $var } = 1; } } else { $self->_warn("CASEVARS option expects an array ref\n"); } next; }; # MAGIC needs a little processing to convert to a 2 element # ARRAY ref if a single string was specified (i.e. for both) /^MAGIC$/i && do { if (ref($cfg->{ $_ }) eq 'ARRAY') { $self->{ MAGIC } = $cfg->{ $_ }; } else { # create a 2-element array reference $self->{ MAGIC } = [ ($cfg->{ $_ }) x 2 ]; } next; }; # set ERROR/DEBUG handling function, checking for a CODE reference # NOTE: error function is stored internally as 'ERRORFN' and not as # 'ERROR' which is the object error status (backwards compatability). /^(ERROR|DEBUG)(FN)?$/i && do { # check this is a code reference $self->_warn("Invalid \L$_\E function\n"), next unless ref($cfg->{ $_ }) eq 'CODE'; $self->{ uc $1 . "FN" } = $cfg->{ $_ }; next; }; # FACTORY must contain a reference to a $FACTORY class or # derivation of same /^FACTORY$/i && do { $self->_warn("Invalid factory object"), next unless UNIVERSAL::isa($cfg->{ $_ }, $FACTORY); $self->{ FACTORY } = $cfg->{ $_ }; next; }; # warn about unrecognised parameter $self->_warn("Invalid configuration parameter: $_\n"); } # DEBUG code if ($self->{ DEBUGLEVEL } & DBGCONF) { $self->_DEBUG(DBGCONF, "$self Version $VERSION\n"); foreach (keys %$self) { $self->_DEBUG(DBGDATA, " %-10s => %s\n", $_, $self->{ $_ }); } } } #======================================================================== # # _parse_file($file) # # Attempts to locate a file with the filename as specified in $file. # If the filename starts with a '/' or '.', it is assumed to be an absolute # file path or one relative to the current working directory. In these # cases, no attempt to look for it outside of its specified location is made. # Otherwise, the directories specified in the LIB entry in the config hash # array are searched followed by the current working directory. If the file # is found, a number of member data items are initialised, the file is # opened and then _parse($file) is called to parse the file. # # Returns the result from _parse($file) or undef on failure. # #======================================================================== sub _parse_file { my $self = shift; my $file = shift; my ($dir, $filepath); $self->_DEBUG(DBGFUNC, "_parse_file($file)\n"); # default $filepath to $file (may be an absolute path) $filepath = $file; # file is relative to $self->{ LIB } unless it starts '/' or '.' if (defined($self->{ LIB }) && $filepath !~ /^[\/\.]/) { foreach $dir (split(/[|;:,]/, $self->{ LIB }), '.') { # construct a full file path $filepath = $dir; $filepath .= '/' unless ($filepath =~ /\/$/); $filepath .= $file; # test if the file exists last if -f $filepath; } } # open file (may still fail if above loop dropped out the bottom) unless (defined($self->{ FILE } = new FileHandle $filepath)) { $self->_error("$filepath: $!"); return undef; } $self->_DEBUG(DBGINFO, "loading file: $filepath\n"); # initialise file stats $self->{ LINENO } = 0; # no of lines read from _get_line(); $self->{ PUTBACK } = 0; # no of lines put back via _unget_line(); $self->{ FILENAME } = $file; $self->{ FILEPATH } = $filepath; $self->{ INPUT } = "$file"; # used for error reporting # call _parse($file) and implicitly return result $self->_parse($file); } #======================================================================== # # _parse_text($text, $symbol) # # Initialises the text member data so that _get_line() can read from it # and then calls _parse() to parse the text contents. If $symbol is # defined it is used as the symbol name which is then stored in the # symbol table. If $symbol is undefined, the block remains anonymous. # # Returns the result from _parse(). # #======================================================================== sub _parse_text { my $self = shift; my $text = shift; my $symbol = shift; # may be undef $self->_DEBUG(DBGFUNC, "_parse_text($text, ", defined $symbol ? $symbol : "", ")\n"); # set text string and initialise stats $self->{ LINENO } = 0; # no of lines read from _get_line(); $self->{ PUTBACK } = 0; # no of lines put back via _unget_line(); $self->{ TEXT } = $text; $self->{ INPUT } = "text string"; # used for error reporting # call _parse() and implicitly return result $self->_parse($symbol); } #======================================================================== # # _parse($symbol) # # The _parse() method reads the current input stream which may originate # from a file (_parse_file($file)) or a text string (_parse_text($text)). # The contents are split into chunks of plain text or MetaText directives # (enclosed by the MAGIC tokens). Text chunks are pushed directly onto # an output list, while directives are parsed and blessed into a directive # class before being pushed out. A reference to the output list is # returned. If a symbol name is passed as the first parameter to parse(), # then a corresponding entry in the $self->{ SYMTABLE } hash is created # to reference this list. # Processing continues until EOF is reached or an %% END(BLOCK|IF)? %% # directive is encountered. # # Blocks encountered that are bounded by a matched pair of %% BLOCK name %% # ... %% ENDBLOCK %% directives will cause a recursive call to # $self->_parse($blockname) to be made to handle the block definition for # the sub-block. Block definitions can theoretically be nested indefinately # although in practice, the process ends when an upper recursion limit is # reached ($self->{ MAXDEPTH }). To this effect, $depth is used to # internally indicate the current recursion depth to each instance. # #======================================================================== sub _parse { my $self = shift; my $symbol = shift; # may be undef - i.e. anonymous symbol my $depth = shift || 1; my ($magic1, $magic2); my ($line, $nextline); my ($symtabent, $factory, $directive); $self->_DEBUG(DBGFUNC, "_parse(%s)\n", defined $symbol ? $symbol : ""); # check for excessive recursion if ($depth > $self->{ MAXDEPTH }) { $self->_error("Maximum recursion exceeded in _parse()"); return undef; } # get a local copy of the MAGIC symbols for efficiency ($magic1, $magic2) = @{ $self->{ MAGIC } }; # get a symbol table entry reference (an undefined $symbol causes # an anonymous array ref to be returned). $symtabent = $self->_symbol_entry($symbol); # clear any existing symbol table entry; this doesn't affect caching, # BTW because _parse() only gets called when reload is necessary splice(@$symtabent, 0) if scalar @$symtabent; # get a reference to the factory object used to create directives return undef unless $factory = $self->_factory(); # # main parsing loop begineth here # READLINE: while (defined($line = $self->_get_line())) { # look to see if there is a directive in the line while ($line =~ / (.*?) # anything preceeding a directive $magic1 # opening directive marker \s* # whitespace (.*?) # directive contents \s* # whitespace ( ($magic2) # closing directive marker (.*) # rest of the line )? # directive may not be terminated $ # EOL so it all gets eaten /sx) { # # if the directive terminating symbol ($magic2) wasn't # found in the line then it suggests that the directive # continues onto the next line, so we append the next # line and try again. # unless ($4) { # if we can't read another line, tack on the # magic token to avoid a dangling directive unless (defined($nextline = $self->_get_line())) { $nextline = $magic2; $self->_warn("Closing directive tag missing\n"); } chomp($line); # add a space and the next line $line .= " $nextline"; next; } # # at this point, we have a line that has a complete directive # ($2) enclosed within it, perhaps with leading ($1) and # trailing ($5) text # # push any preceding text into the output list push(@$symtabent, $1) if length $1; # anything coming after the directive gets re-queued. # CHOMP can be set to remove straggling newlines $self->_unget_line($5) unless $self->{ CHOMP } && $5 eq "\n"; $line = ""; if (defined $2) { # get the create a new Text::MetaText::Directive object $directive = $factory->create_directive($2); # check everything worked OK. eval? bletch! unless (defined $directive) { $self->_parse_error($factory->error()); return undef; } my $tt = "Directive created:\n"; foreach (keys %$directive) { $tt .= sprintf(" %-16s => %s\n", $_, $directive->{ $_ }); } $tt .= " params:\n"; foreach (keys %{ $directive->{ PARAMS } || { } }) { $tt .= sprintf(" %-16s => %s\n", $_, $directive->{ PARAMS }->{ $_ }); } $self->_DEBUG(DBGTEST, $tt); # # some specialist processing required depending on # $directive->{ TYPE } # # END(BLOCK|IF)? marks the end of a defined block $directive->{ TYPE } =~ /^END(BLOCK|IF)?$/ && do { # save a copy of the tag that ended this block # so that the calling method can check it $self->{ ENDTAG } = $directive->{ TYPE }; # return the symbol table list return $symtabent; }; # BLOCK directive defines a sub-block $directive->{ TYPE } eq 'BLOCK' && do { # clear ENDTAG data $self->{ ENDTAG } = ""; # we recursively call $self->_parse() to parse the # block and return a reference to the symbol table # entry; my $block = $self->_parse( $directive->{ IDENTIFIER }, $depth + 1); # check comething was returned return undef unless defined $block; # test that the directive that terminated the block # was END(BLOCK)? unless ($self->{ ENDTAG } =~ /^END(BLOCK)?$/) { $self->_parse_error("ENDBLOCK expected"); return undef; } # if the 'TRIM' option is defined, we should remove # any leading newline and the final newline from the # last line. if (defined $directive->{ TRIM } ? $directive->{ TRIM } : $self->{ TRIM }) { shift @$block if $block->[0] eq "\n"; chomp($block->[ $#{ $block } ]); } # if the 'PRINT' option was defined, we convert the # BLOCK directive to an INCLUDE and push it onto the # symbol table so that it gets processed and a copy # of the BLOCK gets pushed to the output if (defined($directive->{ PRINT })) { $directive->{ TYPE } = 'INCLUDE'; push(@$symtabent, $directive); } # loop to avoid directive getting (re-)pushed below next; }; # push the directive onto the symbol table list push(@$symtabent, $directive); } # if (defined($2)) } # while ($line =~ ... # anything remaining in $line must be plain text push(@$symtabent, $line) if length($line); } # READLINE: while... # return a reference to the 'compiled' symbol table entry $symtabent; } #======================================================================== # # _process($symbol, \%tags, $depth) # # $symbol is a scalar holding the name of a known symbol or a reference # to an array which contains the nodes for an anonymous symbol. In the # former case, the symbol is referenced from the symbol table by calling # $self->_symbol_entry($symbol). In the latter case, the method simply # iterates through the elements of the $symbol array reference. # # Each element in the symbol table entry array is expected to be a simple # scalar containing plain text or a MetaText directive - an instance of # the Text::MetaText::Directive class. Plain text is pushed straight # through to an output queue. Directves are processed according to # their type (e.g. INCLUDE, DEFINE, SUBST, etc) and the resulting output # is pushed onto the output queue. # # The method returns a concatenation of the output list or undef on # error. # #======================================================================== sub _process { my $self = shift; my $symbol = shift; my $tags = shift || {}; my $depth = shift || 1; my ($symtabent, $factory, $directive, $item, $type, $space); my ($ident); my $proctext; my @output = (); $self->_DEBUG(DBGFUNC, "_process($symbol, $tags, $depth)\n"); # check for excessive recursion if ($depth > $self->{ MAXDEPTH }) { $self->_error("Maximum recursion exceeded"); return undef; } # $symbol may be a reference to an anonymous block array... if (ref($symbol) eq 'ARRAY') { $symtabent = $symbol; } # ...or a named symbol which may or may not have been pre-parsed else { # check the symbol has an entry in the symbol table unless ($self->_symbol_defined($symbol)) { $self->_error("$symbol: no such block defined"); return undef; } $symtabent = $self->_symbol_entry($symbol); } # get a reference to the factory object and call directive_type() # to determine the kind of Directive objects it creates return undef unless $factory = $self->_factory(); $directive = $factory->directive_type(); # # The symbol table entry is an array reference passed explicitly in # $symbol or retrieved by calling $self->_symbol_entry($symbol); # Each element in the array can be either a plain text string or an # instance of the directive class created by the factory object. # The former represent normal text blocks in the processed file, the # latter represent pre-parsed MetaText directives (see _parse()) that # have been created by the factory object. The factory provides the # directive_type() method for determining the class type of these # objects. A directive will contain some of the following elements, # based on the directive type and other data defined in the directive # block: # # $directive->{ TYPE } # directive type: INCLUDE, DEFINE, etc # $directive->{ IDENTIFIER } # target, i.e. INCLUDE # $directive->{ PARAMS } # hash ref of variables defined # $directive->{ PARAMSTR } # original parameter string # $directive->{ IF } # an "if=..." conditional # $directive->{ UNLESS } # ditto "unless=..." # $directive->{ DELIMITER } # delimiter string (see _evaluate()) # $directive->{ FILTER } # print filter name and params # $directive->{ FORMAT } # print format # # process each each line from the block foreach $item (@$symtabent) { # get rid of the non-directive cases first... unless (UNIVERSAL::isa($item, $directive)) { # return content if we find the end-of-content marker return join("", @output) if $item =~ /^__(MT)?END__$/; # not a directive - so just push output and loop push(@output, $item); next; } # examine any conditionals (if/unless) if defined if ($item->{ HAS_CONDITION }) { # test any "if=" statement... if (defined $item->{ IF }) { my $result = $self->_evaluate($item->{ IF }, $tags, $item->{ DELIMITER } || $self->{ DELIMITER }); next unless defined($result) && $result > 0; } # ...and/or any "unless=" statement if (defined $item->{ UNLESS }) { my $result = $self->_evaluate($item->{ UNLESS }, $tags, $item->{ DELIMITER } || $self->{ DELIMITER }); next if defined($result) && $result != 0; } } # we take a copy of the directive TYPE and IDENTIFIER (operand) $type = $item->{ TYPE }; $ident = $item->{ IDENTIFIER }; #------------------------------------ # switch ($type) # $type eq 'DEFINE' && do { # $tags is a hash array ref passed in to _process(). We must # clone it before modification in case we should accidentally # update the caller's hash. $tags = { %$tags }; # merge in parameters defined within the INCLUDE directive $self->_integrate_params($tags, $item->{ PARAMS }); next; }; $type eq 'INCLUDE' && do { # an INCLUDE identifier is allowed to contain variable # references which must be interpolated. $ident = $self->_interpolate($ident, $tags); # clone the existing tags my $newtags = { %$tags }; # merge in parameters defined within the INCLUDE directive $self->_integrate_params($newtags, $item->{ PARAMS }); # process the INCLUDE'd symbol and check return $proctext = $self->process_file($ident, $newtags, $depth + 1); return undef unless defined $proctext; # push text onto output list, post-processing it along the way # if $self->{ HAS_POSTPROC } is true (i.e. has filter/format) push(@output, $item->{ HAS_POSTPROC } ? $self->_post_process($item, $proctext) : $proctext); next; }; $type eq 'SUBST' && do { # call _substitute to handle token substitution $proctext = $self->_substitute($item, $tags); if (defined($proctext)) { $proctext = $self->_post_process($item, $proctext) if $item->{ HAS_POSTPROC }; } else { # unrecognised token $self->_warn("Unrecognised token: $item->{ IDENTIFIER }\n") if defined $self->{ ROGUE }->{ WARN }; # resolve nothing if 'delete' is defined as a ROGUE option $proctext = $self->{ ROGUE }->{ DELETE } ? "" : $self->{ MAGIC }->[ 0 ] # rebuild directive . " " . $item->{ PARAMSTR } . " " . $self->{ MAGIC }->[ 1 ]; } push(@output, $proctext); next; }; # default: invalid directive; this shouldn't happen $self->_warn("Unrecognise directive: $type\n") # # switch ($type) #------------------------------------ } # join output tokens and return as a single line join("", @output); } #======================================================================== # # _get_line() # # Returns the next pending line of text to be processed from the input # file or text string. If there are no pending lines already in the # queue, it reads a line of text from the file handle, $self->{ FILE }. # If $self->{ FILE } is undefined, it looks at $self->{ TEXT }, splits # the contents into lines and pushes them onto the pending line list. # The next pending line in the list can then be returned. # # Return a string representing the next input line or undef if no further # lines are available (at EOF for example). # #======================================================================== sub _get_line { my $self = shift; $self->_DEBUG(DBGFUNC, "_get_line() (%s #%d)\n", $self->{ INPUT }, $self->{ LINENO } + 1); # if there are no lines pending, we try to add some to the queue unless (@{ $self->{ LINES } }) { if (defined $self->{ FILE }) { # read from the file push(@{ $self->{ LINES } }, $self->{ FILE }->getline()); # close file if done $self->{ FILE } = undef if $self->{ FILE }->eof(); } elsif (defined $self->{ TEXT }) { # split from the text line push(@{ $self->{ LINES } }, split(/^/m, $self->{ TEXT })); $self->{ TEXT } = undef; } # no default } # LINENO is incremented to indicate that another line has been read, # unless PUTBACK indicates that there are requeued lines. if ($self->{ PUTBACK }) { $self->{ PUTBACK }--; } else { $self->{ LINENO }++; } # return the next token (may be undef to indicate end of stream) return shift(@{ $self->{ LINES } }); } #======================================================================== # # _unget_line($line) # # Unshifts the specified line, $line, onto the front of the pending # lines queue. Does nothing if $line is undefined. Effectively the # complement of _get_line(). The PUTBACK variable variable is # incremented. The _get_line() method uses this as an indication that # the line is re-queued and decrements PUTBACK instead of incrementing # LINENO as per usual. # #======================================================================== sub _unget_line { my $self = shift; my $line = shift; return unless defined $line; my $safeline; ($safeline = $line) =~ s/%/%%/g; $self->_DEBUG(DBGFUNC, "_unget_line(\"$safeline\") (#%d)\n", $self->{ LINENO } - 1); # increment PUTBACK to indicate there are re-queued lines $self->{ PUTBACK }++; # unshift (defined) line onto front of list unshift(@{ $self->{ LINES } }, $line); } #======================================================================== # # _factory() # # Returns a reference to the factory object stored in $self->{ FACTORY }. # If this is undefined, an attempt is made to instantiate a factory # object from the default class, $FACTORY, which is then stored in the # $self->{ FACTORY } hash entry. # # Returns a reference to the factory object. On failure, undef is returned # and a warning is issued via _warn(). # #======================================================================== sub _factory { my $self = shift; # create a default factory if one doesn't already exist unless (defined $self->{ FACTORY }) { # $FACTORY is the default factory package $self->{ FACTORY } = $FACTORY->new() or $self->_error( "Factory construction failed: " . "" ); } # return factory reference $self->{ FACTORY }; } #======================================================================== # # _symbol_name($symbol) # # Returns the name by which $symbol might be referenced in the symbol # table. Applies case folding (to lower case) unless CASE sensitivity # is set. # #======================================================================== sub _symbol_name { my $self = shift; my $symbol = shift; $self->_DEBUG(DBGFUNC, "_symbol_name($symbol)\n"); # convert symbol to lower case unless CASE sensitivity is set $symbol = lc $symbol unless $self->{ CASE }; return $symbol; } #======================================================================== # # _symbol_defined($symbol) # # Returns 1 if the symbol, $symbol, is defined in the symbol table or # 0 if not. # #======================================================================== sub _symbol_defined { my $self = shift; my $symbol = shift; $self->_DEBUG(DBGFUNC, "_symbol_defined($symbol)\n"); # call _symbol_name() to apply any name munging $symbol = $self->_symbol_name($symbol); # return 1 or 0 based on existence of symbol table entry return exists $self->{ SYMTABLE }->{ $symbol } ? 1 : 0; } #======================================================================== # # _symbol_entry($symbol) # # Returns a reference to the symbol table entry for $symbol. If there # is no corresponding symbol currently loaded in the table, the symbol # table entry is initiated to an empty array reference, [], and that # value is returned. This list can then be filled, via the reference, # to populate the symbol table entry. The symbol name, $symbol, may be # converted to lower case (via _symbol_name($symbol)) unless case # sensitivity ($self->{ CASE }) is set. # # Returns a reference to the array that represents the symbol table # entry for the specified entry. # #======================================================================== sub _symbol_entry { my $self = shift; my $symbol = shift; $self->_DEBUG(DBGFUNC, "_symbol_entry(%s)\n", defined $symbol ? $symbol : ""); # an undefined symbol gets an anonymous array return [] unless defined $symbol; # determine the real symbol name accounting for case folding $symbol = $self->_symbol_name($symbol); # create empty table entry for a new symbol $self->{ SYMTABLE }->{ $symbol } = [] unless defined $self->{ SYMTABLE }->{ $symbol }; # return reference to symbol table entry $self->{ SYMTABLE }->{ $symbol }; } #======================================================================== # # _variable_name($variable) # # Returns the name by which $symbol might be referenced. Removes any # extraneous leading '$' and folds to lower case unless CASE sensitivity # is set. # # Returns the (perhaps modified) variable name. # #======================================================================== sub _variable_name { my $self = shift; my $variable = shift; $self->_DEBUG(DBGFUNC, "_variable_name($variable)\n"); # strip leading '$' $variable =~ s/^\$//; # convert symbol to lower case unless CASE sensitivity is set $variable = lc $variable unless $self->{ CASE }; return $variable; } #======================================================================== # # _variable_value($variable, $tags) # # Returns the value associated with the variable as named in $variable. # $variable may be modified (by _variable_name()) which removes any # leading '$' and folding case unless $self->{ CASE } is set. The # resulting variable name is then used to index into $tags to return # the associated value. # # Returns the value from $tags associated with $variable or undef if not # defined. # #======================================================================== sub _variable_value { my $self = shift; my $variable = shift; my $tags = shift; $self->_DEBUG(DBGFUNC, "_variable_value($variable, $tags)\n"); # examine the CASEVARS which lists vars not for CASE folding return $tags->{ $variable } if (defined $self->{ CASEVARS }->{ $variable } && defined $tags->{ $variable }); # special case(s) return time() if $variable eq 'TIME'; # apply any case folding rules to the variable name $variable = $self->_variable_name($variable); # return the associated value return $tags->{ $variable }; } #======================================================================== # # _interpolate($expr, $tags) # # Examines the string expression, $expr, and attempts to replace any # elements within the string that relate to key names in the hash table # referenced by $tags. A simple "$variable" subsititution is identified # when separated by non-word characters # # e.g. "foo/$bar/baz" => "foo/" . $tags->{'bar'} . "/baz" # # Ambiguous variable names can be explicitly resolved using braces as per # Unix shell syntax. # # e.g. "foo${bar}baz" => "foo" . $tags{'bar'} . "baz" # # The function returns a newly constructed string. If $expr is a reference # to a scalar, the original scalar is modified and also returned. # #======================================================================== sub _interpolate { my $self = shift; my $expr = shift; my $tags = shift || {}; my ($s1, $s2); $self->_DEBUG(DBGFUNC, "_interpolate($expr, $tags)\n"); # if a reference is passed, work on the original, otherwise take a copy my $work = ref($expr) eq 'SCALAR' ? $expr : \$expr; # look for a "$identifier" or "${identifier}" and substitute # Note that we save $1 and $2 because they may get trounced during # the call to $self->_variable_value() $$work =~ s/ ( \$ \{? ([\w\.]+) \}? ) / ($s1, $s2) = ($1, $2); defined ($s2 = $self->_variable_value($2, $tags)) ? $s2 : $s1; /gex; # return modified string $$work; } #======================================================================== # # _integrate_params($tags, $params, $lookup) # # Attempts to incorporate all the variables in the $params hash array # reference into the current tagset referenced by $tags. Any embedded # variable references in the $params values will be interpolated using # the values in the $lookup hash. If $lookup is undefined, the $tags # hash is used. # # e.g. # if $params->{'foo'} = 'aaa/$bar/bbb' # then $tags->{'foo'} = 'aaa' . $lookup->{'bar'} . 'bbb' # #======================================================================== sub _integrate_params { my $self = shift; my $tags = shift || {}; my $params = shift || {}; my $lookup = shift || $tags; my ($v, $variable, $value); $self->_DEBUG(DBGFUNC, "_integrate_params($tags, $params, $lookup)\n"); # iterate through each variable in $params foreach $v (keys %$params) { # get the real variable name $variable = $self->_variable_name($v); # interpolate any variable values in the parameter value $value = $self->_interpolate($params->{ $v }, $lookup); # copy variable and value into new tagset $tags->{ $variable } = $value } } #======================================================================== # # _substitute($directive, $tags) # # Examines the SUBST directive referenced by $directive and looks to # see if the variable to which it refers ($directive->{ IDENTIFIER }) # exists as a key in the hash table referenced by $tags. # # If a relevant hash entry does not exist and $self->{ EXECUTE } is set # to a true value, _substitute attempts to run the directive name as a # class method, allowing derived (sub) classes to define member functions # that get called automagically by the base class. If $self->{ EXECUTE } # has a value > 1, it attempts to run a function in the main package with # the same name as the identifier. If all that fails, undef is returned. # #======================================================================== sub _substitute { my $self = shift; my $directive = shift; my $tags = shift; my $ident = $directive->{ IDENTIFIER }; my ($value, $fn); $self->_DEBUG(DBGFUNC, "_substitute($directive, $tags)\n"); # get the variable value if it is defined return $value if defined ($value = $self->_variable_value($ident, $tags)); # nothing more to do unless EXECUTE is true return undef unless $self->{ EXECUTE }; # extract the original parameter string my $prmstr = $directive->{ PARAMSTR } || ''; my $prmhash = { }; # create a new set of directive tags, interpolating any embedded vars $self->_integrate_params($prmhash, $directive->{ PARAMS }, $tags); # execute $ident class method if EXECUTE is defined and $ident exists if ($self->{ EXECUTE } && $self->can($ident)) { $self->_DEBUG(DBGINFO, "executing $self->$ident\n"); return $self->$ident($prmhash, $prmstr) } # if EXECUTE is set > 1, we try to run it as a function in the main # package. We examine the main symbol table to see if the function # exists, otherwise we return undef. return undef unless $self->{ EXECUTE } > 1; # get a function reference from the main symbol table local *glob = $main::{ $ident }; return undef unless defined($fn = *glob{ CODE }); $self->_DEBUG(DBGINFO, "executing main::$ident\n"); # execute the function and implicitly return result &{ $fn }($prmhash, $prmstr); } #======================================================================== # # _evaluate($expr, \%tags, $delimiter) # # Evaluates the specified expression, $expr, using the token values in # the hash array referenced by $tags. The $delimiter parameter may also # be passed to over-ride the default delimiter ($self->{ DELIMITER }) # which is used when splitting 'in' lists for evalutation # (e.g. if="name in Tom,Dick,Harry"). # # Returns 1 if the expression evaluates true, 0 if it evaluates false. # On error (e.g. a badly formed expression), undef is returned. # # NOTE: This method is ugly, slow and buggy. For most uses, it will do # the job admirably, but don't necessarily trust it to do 100% what you # expect if your expressions start to get very complicated. In # particular, multiple nested parenthesis may not evaluate with the # correct precedence, or indeed at all. The method has to parse and # evaluate the $expr string every time it is run. This will start to # slow your processing down if you do a lot of conditional tests. In # the future, it is likely to be compiled down to an intermediate form # to improve execution speed. # #======================================================================== sub _evaluate { my $self = shift; my $expr = shift; my $tags = shift; my $delim = shift || $self->{ DELIMITER }; my ($lhs, $rhs, $sub, $op, $result); # save a copy of the original expression for debug purposes my $original = $expr; # a hash table of comparison operators and associated functions my $compare = { '==' => sub { $_[0] eq $_[1] }, '=' => sub { $_[0] eq $_[1] }, '!=' => sub { $_[0] ne $_[1] }, '>=' => sub { $_[0] ge $_[1] }, '<=' => sub { $_[0] le $_[1] }, '>' => sub { $_[0] gt $_[1] }, '<' => sub { $_[0] lt $_[1] }, '=~' => sub { $_[0] =~ /$_[1]/ }, '!~' => sub { $_[0] !~ /$_[1]/ }, 'in' => sub { grep(/^$_[0]$/, split(/$delim/, $_[1])) }, }; # define a regex to match the comparison keys; note that alpha words # (\w+) must be protected by "\b" boundary assertions and that order # is extremely important (so as to match '>=' before '>', for example) my $compkeys = join('|', qw( \bin\b <= >= < > =~ !~ != == = )); # a hash table of boolean operators and associated functions my $boolean = { '&&' => sub { $_[0] && $_[1] }, '||' => sub { $_[0] || $_[1] }, '^' => sub { $_[0] ^ $_[1] }, 'and' => sub { $_[0] and $_[1] }, 'or' => sub { $_[0] or $_[1] }, 'xor' => sub { $_[0] xor $_[1] }, }; my $boolkeys = join('|', map { /^\w+$/ ? "\\b$_\\b" : "\Q$_" } keys %$boolean); # DEBUG code $self->_DEBUG(DBGFUNC, "_evaluate($expr, $tags)\n"); foreach (keys %$tags) { $self->_DEBUG(DBGEVAL | DBGDATA, " eval: %-10s -> %s\n", $_, $tags->{ $_ }); } # trounce leading and trailing whitespace foreach ($expr) { s/^\s+//; s/\s+$//g; } $self->_DEBUG(DBGEVAL, "EVAL: expr: [$expr]\n"); # throw back expressions already fully simplified; note that we evaluate # expressions as strings to avoid implicit true/false evaluation if ($expr eq '1' or $expr eq '0') { $self->_DEBUG(DBGEVAL, "EVAL: fully simplified: $expr\n"); return $expr; } # # fully expand all expressions in parenthesis # while ($expr =~ /(.*?)\(([^\(\)]+)\)(.*)/) { $lhs = $1; $sub = $2; $rhs = $3; # parse the parenthesised expression return undef unless defined($sub = $self->_evaluate($sub, $tags)); # build a new expression $expr = "$lhs $sub $rhs"; } # check there aren't any hanging parenthesis $expr =~ /[\(\)]/ && do { $self->_warn("Unmatched parenthesis: $expr\n"); return undef; }; # # divide expression by the first boolean operator # if ($expr =~ /(.*?)\s*($boolkeys)\s*(.*)/) { $lhs = $1; $op = $2; $rhs = $3; $self->_DEBUG(DBGEVAL, "EVAL: boolean split: [$lhs] [$op] [$rhs]\n"); # evaluate expression using relevant operator $result = &{ $boolean->{ $op } }( $lhs = $self->_evaluate($lhs, $tags), $rhs = $self->_evaluate($rhs, $tags) ) ? 1 : 0; $self->_DEBUG(DBGEVAL, "EVAL: bool: [$original] => [$lhs] [$op] [$rhs] = $result\n"); return $result; } # # divide expression by the first comparitor # $lhs = $expr; $rhs = $op = ''; if ($expr =~ /^\s*(.*?)\s*($compkeys)\s*(.*?)\s*$/) { $lhs = $1; $op = $2; $rhs = $3; $self->_DEBUG(DBGEVAL, "EVAL: compare: [$lhs] [$op] [$rhs]\n"); } # # cleanup, rationalise and/or evaluate left-hand side # # left hand side is automatically dereferenced so remove any explicit # dereferencing '$' character at the start $lhs =~ s/^\$//; # convert lhs to lower case unless CASE sensitive $lhs = lc $lhs unless $self->{ CASE }; $self->_DEBUG(DBGEVAL, "EVAL: expand lhs: \$$lhs => %s\n", $tags->{ $lhs } || ""); # dereference the lhs variable $lhs = $tags->{ $lhs } || 0; # # no comparitor implies lhs is a simple true/false evaluated variable # unless ($op) { $self->_DEBUG(DBGEVAL, "EVAL: simple: [$lhs] = %s\n", $lhs ? 1 : 0); return $lhs ? 1 : 0; } # # de-reference RHS of the equation ($comp) if it starts with a '$' # if ($rhs =~ s/^\$(.*)/$1/) { # convert variable name to lower case unless CASE sensitive $rhs = lc $rhs unless $self->{ CASE }; $self->_DEBUG(DBGEVAL, "EVAL: expand rhs: $rhs => %s\n", $tags->{ $rhs } || ""); # de-reference variables $rhs = $tags->{ $rhs } || 0; } else { $self->_DEBUG(DBGEVAL, "EVAL: rhs: [$rhs]\n"); } # remove surrounding quotes from rhs value foreach ($rhs) { s/^["']//; s/["']$//; } # force both LHS and RHS to lower case unless CASE sensitive unless ($self->{ CASE }) { $lhs = lc $lhs; $rhs = lc $rhs; } # # evaluate the comparison statement # $result = &{ $compare->{"\L$op"} }($lhs, $rhs) ? 1 : 0; $self->_DEBUG(DBGEVAL, "EVAL: comp: [%s] => [%s] [%s] [%s] = %s\n", $original, $lhs, $op, $rhs, $result); $result; } #======================================================================== # # _post_process($directive, $string) # # This function is called to post-process the output generated when # process() conducts a SUBST or an INCLUDE operation. The FILTER and # FORMAT parameters of the directive, $directive, are used to indicate # the type of post-processing required. # # Returns the processed string. # #======================================================================== sub _post_process { my $self = shift; my $directive = shift; my $line = shift; my $formats = { QUOTED => '"%s"', DQUOTED => '"%s"', SQUOTED => "'%s'", MONEY => "%P%.2f", # '%P' says "use printf() not time2str()" }; my ($pre, $post); my @lines; # DEBUG code if ($self->{ DEBUGLEVEL } & DBGFUNC) { my $dbgline = $line; $dbgline =~ s/\n/\\n/g; $dbgline =~ s/\t/\\t/g; substr($dbgline, 0, 16) = "..." if length $dbgline > 16; $dbgline = "\"$dbgline\""; $self->_DEBUG(DBGFUNC, "_post_process($directive, $dbgline)\n"); } $self->_DEBUG(DBGPOST, "Post-process: \n[$line]\n"); # no need to do anything if there's nothing to operate on return "" unless defined $line && length $line; # split into lines, accounting for a trailing newline which would # otherwise be ignored by split() @lines = split(/\n/, $line); push(@lines, "") if chomp($line); $self->_DEBUG(DBGPOST, " -> [%s]\n" , join("]\n [", @lines)); # see if the "FILTER" option is specified if (defined($directive->{ FILTER })) { # extract the filter name and parameters: () $directive->{ FILTER } =~ /([^(]+)(?:\((.*)\))?/; my $fltname = $1; # split filter parameters and remove enclosing quotes my @fltparams = split(/\s*,\s*/, $2 || ""); foreach (@fltparams) { s/^"//; s/"$//; } # is there a filter function with the name specified? if (ref($self->{ FILTER }->{ $fltname }) eq 'CODE') { $self->_DEBUG(DBGINFO, "filter: $fltname(%s)\n", join(", ", $fltname, @fltparams)); # deref filter code to speed up multi-line processing my $fltfn = $self->{ FILTER }->{ $fltname }; # feed each line through filter function foreach (@lines) { $pre = $_; $_ = &$fltfn($fltname, $_, @fltparams); $post = $_; if ($self->{ DEBUGLEVEL } & DBGPOST) { $self->_DEBUG(DBGDATA, "filter: [ $pre ]\n -> [ $post ]\n"); } } } else { $self->_warn("$fltname: non-existant or invalid filter\n"); } } # # if the "format=