package DBIx::MyParsePP::Rule; require Exporter; @ISA = qw(Exporter); @EXPORT = qw(MYPARSEPP_SHRINK_IDENTICAL MYPARSEPP_SHRINK_LEAFS MYPARSEPP_SHRINK_SINGLES MYPARSEPP_SHRINK_CHILDREN); use strict; 1; use constant RULE_NAME => 0; use constant MYPARSEPP_SHRINK_IDENTICAL => 1; use constant MYPARSEPP_SHRINK_LEAFS => 2; use constant MYPARSEPP_SHRINK_SINGLES => 4; use constant MYPARSEPP_SHRINK_CHILDREN => 8; sub new { my $class = shift; my $rule = bless (\@_, $class); return $rule; } sub getName { return $_[0]->[RULE_NAME]; } sub name { return $_[0]->[RULE_NAME]; } sub children { my @rule = @{$_[0]}; return @rule[1..$#rule]; } sub getChildren { my @rule = @{$_[0]}; return @rule[1..$#rule]; } sub toString { my $rule = shift; if ($#{$rule} > -1) { return join('', map { $rule->[$_]->toString(); } (1..$#{$rule}) ); } else { return undef; } } sub shrink { my ($parent, $flags) = @_; $flags = MYPARSEPP_SHRINK_IDENTICAL | MYPARSEPP_SHRINK_LEAFS | MYPARSEPP_SHRINK_SINGLES | MYPARSEPP_SHRINK_CHILDREN if not defined $flags; if (($#{$parent} == 0) && ($flags & MYPARSEPP_SHRINK_LEAFS)) { return undef; } elsif (($#{$parent} == 1) && ($flags & MYPARSEPP_SHRINK_SINGLES)) { if ($flags & MYPARSEPP_SHRINK_CHILDREN) { return $parent->[1]->shrink($flags); } else { return $parent->[1]; } } elsif ($flags & MYPARSEPP_SHRINK_CHILDREN) { my @new_children; foreach my $i (1..$#{$parent}) { my $child = $parent->[$i]->shrink($flags); if ( ($flags & MYPARSEPP_SHRINK_IDENTICAL) && (ref($child) eq 'DBIx::MyParsePP::Rule') && ($child->name() eq $parent->name()) ) { push @new_children, $child->children() if defined $child->children(); } else { push @new_children, $child if defined $child; } } my $new_rule = DBIx::MyParsePP::Rule->new($parent->name(), @new_children); return $new_rule->shrink($flags & ~MYPARSEPP_SHRINK_CHILDREN); } else { return $parent; } } sub extractInner { my $rule = shift; return undef if ($#{$rule} == 0); my @matches; foreach my $i (1..$#{$rule}) { my $extract = $rule->[$i]->extract(@_); next if not defined $extract; if (ref($extract) eq 'ARRAY') { push @matches, @{$extract}; } else { push @matches, $extract; } } if ($#matches == -1) { return undef; } elsif ($#matches == 0) { return $matches[0]; } else { return \@matches; } } sub getFields { return $_[0]->extract('simple_ident_q','table_ident','ident'); } sub fields { return $_[0]->getFields(); } sub tables { return $_[0]->getTables(); } sub getTables { my $rule = shift; my @tables_array; my %tables_hash; my $idents = $rule->extract('table_wild','table_ident','simple_ident_q'); $idents = [$idents] if ref($idents) ne 'ARRAY'; foreach my $ident (@{$idents}) { my $shrinked_ident = $ident->shrink(); my @children = $shrinked_ident->children(); my $table; if ($#children == -1) { # No children $table = $shrinked_ident; } elsif ($#children == 0) { # One child $table = $children[0]; } elsif ($#children == 2) { if ( ($shrinked_ident->name() eq 'simple_ident_q') || ($shrinked_ident->name() eq 'table_wild') ) { $table = $children[0]; # We have "database.table" } elsif ($shrinked_ident->name() eq 'table_ident') { $table = $children[2]; # We have "table.field" } else { print STDERR "Assertion: \$\#children == $#children but name() = ".$shrinked_ident->name()."\n"; return undef; } } elsif ($#children == 4) { # We have "database.table.field" $table = DBIx::MyParsePP::Rule->new( $ident->name(), @children[0..2] ); } else { print STDERR "Assertion: \$\#children == $#children\n"; return undef; } if (not exists $tables_hash{$table->toString()}) { push @tables_array, $table; $tables_hash{$table->toString()} = 1; } } return \@tables_array; } sub extract { my $rule = shift; foreach my $match (@_) { return $rule if ($rule->name() eq $match); } return $rule->extractInner(@_); } sub print { return $_[0]->toString(); } 1; __END__ =pod =head1 NAME DBIx::MyParsePP::Rule - Access individual elements from the DBIx::MyParsePP parse tree =head1 SYNOPSIS use DBIx::MyParsePP; use DBIx::MyParsePP::Rule; my $parser = DBIx::MyParsePP->new(); my $query = $parser->parse("SELECT 1"); # $query is a DBIx::MyParsePP::Rule object my $root = $query->root(); print $root->name(); # prints 'query', the top-level grammar rule my @children = $root->chilren(); # print $children[0]->name(); # prints 'verb_clause', the second-level rule print ref($chilren[1]); # prints 'DBIx::MyParsePP::Token' print $chilren[1]->type(); # prints END_OF_INPUT print [[[$root->chilren()]->[0]->chilren()]->[0]->chilren()]->[0]->name(); # Prints 'select' =head1 DESCRIPTION L uses the C grammar from the MySQL source to parse SQL strings. A parse tree is produced which contains one branch for every rule encountered during parsing. This means that very deep trees can be produced where only certain branches are important. =head1 METHODS C constructs a new rule C and C returns the name of the rule C and C return (as array) the right-side items that were matched for that rule, e.g. its "child branches" in the parse tree. C converts the parse tree back into SQL by walking the tree and gathering all tokens in sequence. =head1 EXTRACTING PARTS C can be used to walk the tree and extract relevant parts. The method returns undef if no part of the tree matched, a L or a L object if a sigle match was made, or a reference to an array of such objects if several parts matched. Names to the front of the C<@names> list are matched first. C or C can be used to obtain all fields referenced in a parse tree or a part of it. Those functions will return if no fields were referenced or a reference to an array containing C objects for each field. The Rule object can contain several 'ident' subrules if a database and/or a table name was specified for the given field. C or C can be used in the same manner to obtain all tables referenced in the parse tree. =head1 SHRINKING THE TREE The raw tree produced by L contains too many branches, many of them not containing any useful information. The C method is used to convert the tree into a more manageable form. C<$flags> can contain the following constants joined by C<|>: C if a parent and a child node are of the same type, they will be merged together. This way expressions such as C<1 + 1 + 1> or C will be converted from a nested tree with one item per branch into a single Rule containing a list of all the items and the tokens between them (e.g. C<+> or C<,>). Expressions such as C<1 + 2 * 3> will remain as a tree because multiplication and addition have different precedence. C will remove any Rules that have no children. C will remove any Rules that have just a single child, linking the child directly to the upper-level Rule. C will apply C recursively for all children. If no flags are specified, all listed transformations are applied recursively. =cut # Please note that if you use C on expressions such as "1 + 1 + 1"