# $Revision: #3 $$Date: 2005/08/31 $$Author: jd150722 $
######################################################################
#
# This program is Copyright 2003-2005 by Jeff Dutton.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of either the GNU General Public License or the
# Perl Artistic License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# If you do not have a copy of the GNU General Public License write to
# the Free Software Foundation, Inc., 675 Mass Ave, Cambridge,
# MA 02139, USA.
######################################################################
package Parse::RandGen::Production;
require 5.006_001;
use Carp;
use Parse::RandGen qw($Debug);
use strict;
use vars qw($Debug);
######################################################################
#### Creators
sub new {
my $class = shift;
my $self = {
_conditions => [ ], # Ordered list of Conditions that must be satisfied for the production to be true
_action => undef, # Action to take if the production is satisfied
_rule => undef, # The Rule that this Production belongs to
_name => undef, # The name of the Production (most are anonymous, but Productions can be named if they need to be accessed later)
_number => undef, # The number of the Production in the Rule (which production is this 0...X)
#@_,
};
bless $self, ref($class)||$class;
# Optional named arguments can be passed. Any unknown named arguments are turned into object data members.
my @args = @_; # Arguments can override defaults or create new attributes in the object
my $numArgs = $#args + 1;
if ($numArgs == 1) {
$self->addCond(shift(@args));
} elsif ($numArgs) {
($numArgs % 2) and confess("%Error: new Production called with an odd number of arguments ($numArgs); arguments must be in named pairs (or a single Condition argument)!");
$self->set(@args);
}
my $rule = $self->{_rule};
(!defined($self->{_rule}) || UNIVERSAL::isa($rule, "Parse::RandGen::Rule"))
or confess("%Error: new Production was passed an unknown \"rule\" argument \"$rule\"!\n");
return($self);
}
######################################################################
#### Methods
sub set {
my $self = shift or confess("%Error: Cannot call without a valid object!");
my @args = @_;
my $numArgs = $#args + 1;
($numArgs) or confess("%Error: Production::set() called with no arguments!");
($numArgs % 2) and confess("%Error: Production::set() called with an odd number of arguments ($numArgs); arguments must be in named pairs!");
while ($#args >= 0) {
my ($arg, $val) = (shift(@args), shift(@args));
if ($arg eq "cond") {
$self->addCond($val);
} elsif ($arg eq "action") {
$self->{_action} = $val;
} elsif ($arg eq "rule") {
UNIVERSAL::isa($val, "Parse::RandGen::Rule") or confess("%Error: Production::set() called with a bad \"rule\" argument ($val)!");
$self->{_rule} = $val;
} else {
# Unknown arguments become data members
$self->{$arg} = $val;
}
}
}
sub addCond {
my $self = shift or confess("%Error: Cannot call without a valid object!");
my @args = @_;
while ($#args >= 0) {
my $val = shift(@args);
defined($val) or confess("%Error: Production::addCond(): condition is undefined!");
my $element = $val;
my $cond = undef;
my $valRef = ref($val);
if ($valRef) {
if ($valRef eq "Regexp") {
# Regular expression
$cond = Parse::RandGen::Regexp->new($element);
} else {
(UNIVERSAL::isa($val, "Parse::RandGen::Condition"))
or confess("%Error: The Production condition is a reference (ref=\"$valRef\"), but not a supported type!");
$cond = $val;
}
} elsif ($val =~ m/^\s* (\w+) (?: \( (.*?) \) )? \s*$/x) { # subrule(subargs)
my ($min, $max) = (1, 1);
$element = $1;
my $subargs = $2;
if (defined($subargs) && $subargs) {
if ($subargs eq "?" ) { $min = 0; $max = 1; } # ?
elsif ($subargs =~ m/^(s|\+)$/ ) { $min = 1; $max = undef; } # s or +
elsif ($subargs =~ m/^((s\?)|\*)$/ ) { $min = 0; $max = undef; } # s? or *
elsif ($subargs =~ /(\d+)(?:\.\.(\d+))?/ ) { $min = $1; $max = ($2 || $min); } # 2..3 or ..4 or 5..
else {
confess("%Error: The Production condition \"${val}\" has decoded to be a subrule, but the subargs are not understood (${subargs})!");
}
}
$cond = Parse::RandGen::Subrule->new($element, min=>$min, max=>$max);
} elsif ($element =~ $Parse::RandGen::Literal::ValidLiteralRE) {
# Must be a literal surrounded by single or double quotes
$element = Parse::RandGen::Literal::stripLiteral($element);
$cond = Parse::RandGen::Literal->new($element);
} else {
confess("%Error: The Production condition \"${val}\" has decoded to be a literal, but it doesn't look good!");
}
$cond->{_production} = $self;
push @{$self->{_conditions}}, $cond;
}
}
sub check {
my $self = shift or confess("%Error: Cannot call without a valid object!");
return "%Error: Production has no RandGen object!\n" unless $self->grammar();
my $grammarName = $self->grammar()->name();
my $err = "";
foreach my $cond (@{$self->{_conditions}}) {
next unless $cond->isSubrule();
my $subrule = $cond->subrule(); # Will be undef if there is a problem
my $subruleName = $cond->element();
next unless defined($subruleName); # Anonymous subrule
my $rule = $self->grammar()->rule($subruleName);
next if (defined($rule) && ($rule == $subrule)); # Everything is OK!
my $ruleName = $self->rule()->name(); # The name of the rule that this production belongs to...
$err .= "%Error: The \"${ruleName}\" rule references the subrule \"${subruleName}\", which is not defined in the \"${grammarName}\" grammar!\n" unless (defined($subrule));
}
return $err;
}
sub dump {
my $self = shift or confess("%Error: Cannot call without a valid object!");
my $output = "";
foreach my $cond (@{$self->{_conditions}}) {
$output .= " " if $output;
$output .= $cond->dump();
}
$output .= $self->_dumpParseFunction();
return $output;
}
sub dumpHeir {
my $self = shift or confess("%Error: Cannot call without a valid object!");
my $output = "";
foreach my $cond (@{$self->{_conditions}}) {
$output .= " " if $output;
$output .= $cond->dump();
}
return $output;
}
sub pick {
my $self = shift or confess("%Error: Cannot call without a valid object!");
my %args = ( match=>1, # Default is to pick matching data
vals => { }, # Hash of values of various hard-coded sub-rules (by name)
@_ );
my @conds = $self->conditions();
my $badCond;
my $val = "";
if (!$args{match}) {
my @badConds;
foreach my $cond (@conds) {
next if ($cond->isQuantSupported() && $cond->zeroOrMore()); # Cannot corrupt
push(@badConds, $cond);
}
my $i = int(rand($#badConds+1));
$badCond = $badConds[$i];
}
for (my $i=0; $i <= $#conds; $i++) {
$val .= $conds[$i]->pick(%args, match=>($args{match} || ((defined($badCond) && ($badCond==$conds[$i]))?0:1)) );
}
return( $val );
}
# Returns true (1) if this production contains any of the rules specified by the "vals" argument
sub containsVals {
my $self = shift or confess("%Error: Cannot call without a valid object!");
my %args = ( vals => { }, # Hash of values of various hard-coded sub-rules (by name)
@_ );
foreach my $cond ($self->conditions()) {
return 1 if $cond->containsVals(%args);
}
return 0;
}
######################################################################
#### Accessors
sub action {
my $self = shift or confess("%Error: Cannot call name() without a valid object!");
return $self->{_action};
}
sub rule { # Rule that this Production belongs to
my $self = shift or confess("%Error: Cannot call rule() without a valid object!");
return $self->{_rule};
}
sub name { # Name of the Production (optional)
my $self = shift or confess("%Error: Cannot call rule() without a valid object!");
return $self->{_name};
}
sub number { # Production number on its Rule (required if defined(rule()))
my $self = shift or confess("%Error: Cannot call rule() without a valid object!");
return $self->{_number};
}
sub grammar {
my $self = shift or confess("%Error: Cannot call grammar() without a valid object!");
my $grammar = $self->rule()->grammar() if defined($self->rule());
return $grammar;
}
sub conditions {
my $self = shift or confess("%Error: Cannot call conditions() without a valid object!");
return (@{$self->{_conditions}});
}
######################################################################
#### Private Functions
sub _dumpParseFunction {
my $self = shift or confess("%Error: Cannot call without a valid object!");
my $output = "\n\t\t\t {\t";
my $indent = "\n\t\t\t\t";
if (defined($self->{_action})) {
$output .= $self->{_action} . " }";
return $output;
}
# Determine whether this is a single terminal production or not
my @conds = @{$self->{_conditions}};
my $ind = 1; # Index 0 is the rule name
$output .= 'my $val=""; my $obj={val=>undef,offset=>$itempos[1]{offset}{from},len=>0,rules=>{}};';
foreach my $cond (@conds) {
$output .= $indent;
my $sName = $cond->isSubrule() ? $cond->subrule()->name() : ""; # Name of subrule, if subrule...
my $sKeep = $sName ? $cond->subrule()->{keep}||"" : ""; # Keep this subrule?
my $sParse = $sName ? $cond->subrule()->{parse}||"" : ""; # Parse this subrule (preserve heirarchy beneath it)
if ($cond->once()) {
#$output .= "if (ref(\$item[$ind])) { \$val.=\$item[$ind]->{val}; } else { \$val.=\$item[$ind]; }";
if ($sName) {
$output .= "\$val.=\$item[$ind]->{val};";
if ($sKeep eq "once") {
$output .= " \$obj->{rules}{$sName}=\$item[$ind];";
} elsif ($sKeep eq "all") {
$output .= " \$obj->{rules}{$sName}||=[]; push(\@{\$obj->{rules}{$sName}}, \$item[$ind]);";
}
if (!$sParse) { # Not adding a new level of parse, so flatten rules
$output .= "${indent}foreach my \$j (keys \%{\$item[$ind]->{rules}}) {"
."${indent}\tmy \$o=\$item[$ind]->{rules}{\$j};"
."${indent}\tif (ref(\$o) eq \"ARRAY\") { \$obj->{rules}{\$j}||=[]; push(\@{\$obj->{rules}{\$j}}, \$o); }"
."${indent}\telse { \$obj->{rules}{\$j}=\$o; } }";
}
} else {
$output .= "\$val.=\$item[$ind];";
}
} else {
#$output .= "foreach my \$i (\@{\$item[$ind]}) { if(ref(\$i)){ \$val.=\$i->{val}; } else { \$val.=\$i; }";
if ($sName) {
$output .= "foreach my \$i (\@{\$item[$ind]}) { \$val.=\$i->{val};";
if ($sKeep eq "once") {
$output .= " \$obj->{rules}{$sName}=\$i;";
} elsif ($sKeep eq "all") {
$output .= " \$obj->{rules}{$sName} ||= []; push(\@{\$obj->{rules}{$sName}}, \$i);";
}
if (!$sParse) { # Not adding a new level of parse, so flatten rules
$output .= "${indent}\tforeach my \$j (keys \%{\$i->{rules}}) {"
."${indent}\t\tmy \$o=\$i->{rules}{\$j};"
."${indent}\t\tif (ref(\$o) eq \"ARRAY\") { \$obj->{rules}{\$j}||=[]; push(\@{\$obj->{rules}{\$j}}, \$o); }"
."${indent}\t\telse { \$obj->{rules}{\$j}=\$o; } }";
}
$output .= " }";
} else {
$output .= "foreach my \$i (\@{\$item[$ind]}) { \$val.=\$i; }";
}
}
$ind++;
}
#$output .= " print(\$item[0],\" [\",\$itempos[1]{offset}{from},\"..\${thisoffset}]\\n\");";
(defined($self->rule()) and $self->rule()->name()) or confess("%Error: _dumpParseFunction(): Rule is not defined or the Rule is anonymous (no name)!");
my $ruleName = $self->rule()->name();
my $prodNum = $self->number();
($self->grammar()->rule($ruleName) == $self->rule()) or confess("%Error: Internal error! Cannot find our Rule \"$ruleName\" on our RandGen!");
#$output .= " \$thisparser->{local}{grammar}->rule(\"$ruleName\")->production($prodNum);";
$output .= $indent.'$obj->{val}=$val; $obj->{len}=length($val);';
$output .= ' $return=$obj; }';
return $output;
}
######################################################################
#### Package return
1;
__END__
=pod
=head1 NAME
Parse::RandGen::Production - Conditions for rule to match (and the action to take if it does)
=head1 DESCRIPTION
A Production defines a set of Conditions that must be satisfied for
a Rule to match input text. The Production consists of an ordered list
of Conditions (subrules, literals, and regexps) that must sequentially
match for the Production to match.
A rule matches if any one of its Productions match the input text.
In BNF notation, the relationship of Rules and Productions is:
rule1: production1
| production2
| production3
For example:
perlFuncCall: /&?/ identifier '(' argument(s?) ')'
| scalar '->' identifier '(' argument(s?) ')'
The two Productions in this example could respectively match:
"func()", "func(x, y)", or "&func(x)"
"$obj->member()"
The first Production in this example is a list of:
Parse::RandGen::Production->new(
cond => qr/&?/, # Regexp Condition - 0 or 1 '&' characters
cond => "indentifier", # Subrule Condition - exactly 1 "identifier" rule
cond => q{'('}, # Literal Condition - single '(' character
cond => "argument(s?)", # Subrule Condition - 0 or more "argument" rules
cond => ')', # Literal Condition - single ')' character
);
Be aware of the greediness of the underlying parsing mechanism. If a production consists of
subsequent conditions, such that the earlier ones can satisfy later ones, then they must
be combined into one condition represented by a regular expression. Regular expressions
can manage the greediness of their matching in order to get the desired effect.
identifier: /\w*/ /\d/ # The second condition can be met by the first
=head1 METHODS
=over 4
=item new
Creates a new Production. The arguments are all named pairs. The only required pair is "cond" => condition.
The Production can be named with the "name" argument (accessed by the name() accessor).
Any unknown named arguments are treated as user-defined fields. They are stored in the Condition hash ($cond->{}).
Parse::RandGen::Production->new( name => 'request',
cond => q{'Request:'},
cond => qr/(\s*\w+\s*[,$]+)/ );
=item rule
Returns the Parse::RandGen::Rule object that this Production belongs to.
=item grammar
Returns the Parse::RandGen::Grammar object that this Production belongs to (returns rule()->grammar()).
=item check
Checks the Production to verify that all subrules can be found in the RandGen.
=item conditions
Returns a list with the Production's Conditions.
=back
=head1 SEE ALSO
B<Parse::RandGen>,
B<Parse::RandGen::Rule>, and
B<Parse::RandGen::Condition>
=head1 AUTHORS
Jeff Dutton
=cut
######################################################################