package YAPE::Regex::Element; $VERSION = '4.00'; sub text { exists $_[0]{TEXT} ? $_[0]{TEXT} : "" } sub string { $_[0]->text . "$_[0]{QUANT}$_[0]{NGREED}" } sub fullstring { $_[0]->string } sub quant { "$_[0]{QUANT}$_[0]{NGREED}" } sub ngreed { $_[0]{NGREED} } package YAPE::Regex::anchor; sub new { my ($class,$match,$q,$ng) = @_; bless { TEXT => $match, QUANT => $q, NGREED => $ng }, $class; } sub type { 'anchor' } package YAPE::Regex::macro; sub new { my ($class,$match,$q,$ng) = @_; bless { TEXT => $match, QUANT => $q, NGREED => $ng }, $class; } sub text { "\\$_[0]{TEXT}" } sub type { 'macro' } package YAPE::Regex::oct; sub new { my ($class,$match,$q,$ng) = @_; bless { TEXT => $match, QUANT => $q, NGREED => $ng }, $class; } sub text { "\\$_[0]{TEXT}" } sub type { 'oct' } package YAPE::Regex::hex; sub new { my ($class,$match,$q,$ng) = @_; bless { TEXT => $match, QUANT => $q, NGREED => $ng }, $class; } sub text { "\\x$_[0]{TEXT}" } sub type { 'hex' } package YAPE::Regex::backref; sub new { my ($class,$match,$q,$ng) = @_; bless { TEXT => $match, QUANT => $q, NGREED => $ng }, $class; } sub text { "\\$_[0]{TEXT}" } sub type { 'backref' } package YAPE::Regex::ctrl; sub new { my ($class,$match,$q,$ng) = @_; bless { TEXT => $match, QUANT => $q, NGREED => $ng }, $class; } sub text { "\\c$_[0]{TEXT}" } sub type { 'ctrl' } package YAPE::Regex::named; sub new { my ($class,$match,$q,$ng) = @_; bless { TEXT => $match, QUANT => $q, NGREED => $ng }, $class; } sub text { "\\N{$_[0]{TEXT}}" } sub type { 'named' } package YAPE::Regex::Cchar; sub new { my ($class,$q,$ng) = @_; bless { QUANT => $q, NGREED => $ng }, $class; } sub text { '\C' } sub type { 'Cchar' } package YAPE::Regex::slash; sub new { my ($class,$match,$q,$ng) = @_; bless { TEXT => $match, QUANT => $q, NGREED => $ng }, $class; } sub text { "\\$_[0]{TEXT}" } sub type { 'slash' } package YAPE::Regex::any; sub new { my ($class,$q,$ng) = @_; bless { TEXT => '.', QUANT => $q, NGREED => $ng }, $class; } sub type { 'any' } package YAPE::Regex::class; sub new { my ($class,$match,$neg,$q,$ng) = @_; bless { TEXT => $match, NEG => $neg, QUANT => $q, NGREED => $ng }, $class; } sub text { $_[0]{NEG} =~ /[pP]/ ? "\\$_[0]{NEG}\{$_[0]{TEXT}\}" : "[$_[0]{NEG}$_[0]{TEXT}]" } sub type { 'class' } package YAPE::Regex::text; sub new { my ($class,$match,$q,$ng) = @_; bless { TEXT => $match, QUANT => $q, NGREED => $ng }, $class; } sub type { 'text' } package YAPE::Regex::alt; sub new { bless { NGREED => "", QUANT => "" }, $_[0] } sub text { '' } sub string { '|' } sub type { 'alt' } package YAPE::Regex::comment; sub new { my ($class,$text,$X) = @_; bless { TEXT => $text, XCOMM => $X }, $class; } sub string { $_[0]{XCOMM} ? " # $_[0]{TEXT}" : "(?#$_[0]{TEXT})" } sub xcomm { $_[0]{XCOMM} } sub type { 'comment' } package YAPE::Regex::whitespace; sub new { my ($class,$text) = @_; bless { TEXT => $text }, $class; } sub type { 'whitespace' } sub string { $_[0]{TEXT} } package YAPE::Regex::flags; sub new { my ($class,$add,$sub) = @_; my %mode = map { $_ => 1 } split //, $add ||= ""; delete @mode{split //, $sub ||= ""}; $add = join "", sort split //, $add; $sub = join "", sort split //, $sub; bless { MODE => \%mode, ON => $add, OFF => $sub }, $class; } sub string { "(?$_[0]{ON}" . ($_[0]{OFF} && "-$_[0]{OFF}") . ')' } sub type { 'flags' } package YAPE::Regex::cut; sub new { bless { CONTENT => $_[1] || [], QUANT => $_[2] || "", NGREED => $_[3] || "", }, $_[0] } sub fullstring { join "", $_[0]->string, map($_->fullstring, @{ $_[0]{CONTENT} }), ")$_[0]{QUANT}$_[0]{NGREED}" } sub string { '(?>' } sub type { 'cut' } package YAPE::Regex::lookahead; sub new { bless { POS => $_[1], CONTENT => $_[2] || [] }, $_[0] } sub fullstring { join "", $_[0]->string, map($_->fullstring, @{ $_[0]{CONTENT} }), ')' } sub string { '(?' . ('!','=')[$_[0]{POS}] } sub type { 'lookahead' } sub pos { $_[0]{POS} } package YAPE::Regex::lookbehind; sub new { bless { POS => $_[1], CONTENT => $_[2] || [] }, $_[0] } sub fullstring { join "", $_[0]->string, map($_->fullstring, @{ $_[0]{CONTENT} }), ')' } sub string { '(?<' . ('!','=')[$_[0]{POS}] } sub type { 'lookbehind' } sub pos { $_[0]{POS} } package YAPE::Regex::conditional; sub new { bless { OPTS => 1, CONTENT => $_[1] || [], TRUE => $_[2] || [], FALSE => $_[3] || [], QUANT => $_[4] || "", NGREED => $_[5] || "", }, $_[0]; } sub fullstring { join "", $_[0]->string, map($_->fullstring, @{ $_[0]{TRUE} }), $_[0]{OPTS} == 2 ? ( '|', map($_->fullstring, @{ $_[0]{FALSE} }), ) : (), ")$_[0]{QUANT}$_[0]{NGREED}"; } sub string { '(?' . (ref $_[0]{CONTENT} ? $_[0]{CONTENT}[0]->fullstring : "($_[0]{CONTENT})" ) } sub backref { $_[0]{CONTENT} } sub type { 'cond' } package YAPE::Regex::group; sub new { my $on = join "", sort split //, $_[1] || ""; my $off = join "", sort split //, $_[2] || ""; bless { ON => $on, OFF => $off, CONTENT => $_[3] || [], QUANT => $_[4] || "", NGREED => $_[5] || "", }, $_[0] } sub fullstring { join "", $_[0]->string, map($_->fullstring, @{ $_[0]{CONTENT} }), ")$_[0]{QUANT}$_[0]{NGREED}" } sub string { $_[0]{OFF} ? "(?$_[0]{ON}-$_[0]{OFF}:" : "(?$_[0]{ON}:" } sub type { 'group' } package YAPE::Regex::capture; sub new { bless { CONTENT => $_[1] || [], QUANT => $_[2] || "", NGREED => $_[3] || "", }, $_[0] } sub fullstring { join "", $_[0]->string, map($_->fullstring, @{ $_[0]{CONTENT} }), ")$_[0]{QUANT}$_[0]{NGREED}" } sub string { '(' } sub type { 'capture' } package YAPE::Regex::code; sub new { bless { CONTENT => $_[1], QUANT => "", NGREED => "", }, $_[0] } sub text { "(?$_[0]{CONTENT})" } sub type { 'code' } package YAPE::Regex::later; sub new { bless { CONTENT => $_[1], QUANT => "", NGREED => "", }, $_[0] } sub text { "(??$_[0]{CONTENT})" } sub type { 'later' } package YAPE::Regex::close; sub new { bless { QUANT => $_[1] || "", NGREED => $_[2] || "" }, $_[0] } sub string { ")$_[0]{QUANT}$_[0]{NGREED}" } sub type { 'close' } 1; =head1 NAME YAPE::Regex::Element - sub-classes for YAPE::Regex elements =head1 VERSION This document refers to YAPE::Regex::Element version 4.00. =head1 SYNOPSIS use YAPE::Regex 'MyExt::Mod'; # this sets up inheritence in MyExt::Mod # see YAPE::Regex documentation =head1 C MODULES The C hierarchy of modules is an attempt at a unified means of parsing and extracting content. It attempts to maintain a generic interface, to promote simplicity and reusability. The API is powerful, yet simple. The modules do tokenization (which can be intercepted) and build trees, so that extraction of specific nodes is doable. =head1 DESCRIPTION This module provides the classes for the C objects. The base class for these objects is C. The objects classes are numerous. =head2 Methods for C This class contains fallback methods for the other classes. =over 4 =item * Ctext;> Returns a string representation of the content of the regex node I, not any nodes contained in it. This is C for non-text nodes. =item * Cstring;> Returns a string representation of the regex node I, not any nodes contained in it. =item * Cfullstring;> Returns a string representation of the regex node, including any nodes contained in it. =item * Cquant;> Returns a string with the quantity, and a C if the node is non-greedy. The quantity is one of C<*>, C<+>, C, C<{I,I}>, or an empty string. =item * Cngreed;> Returns a C if the node is non-greedy, and an empty string otherwise. =back =head2 Methods for C This class represents anchors. Objects have the following methods: =over 4 =item * Cnew($type,$q,$ng);> Creates a C object. Takes three arguments: the anchor (C<^>, C<\A>, C<$>, C<\Z>, C<\z>, C<\B>, C<\b>, or C<\G>), the quantity, and the non-greedy flag. The quantity I be an empty string. my $anc = YAPE::Regex::anchor->new('\A', '', '?'); # /\A?/ =item * Ctype;> Returns the string C. =back =head2 Methods for C This class represents character-class macros. Objects have the following methods: =over 4 =item * Cnew($type,$q,$ng);> Creates a C object. Takes three arguments: the macro (C, C, C, C, C, or C), the quantity, and the non-greedy flag. my $macro = YAPE::Regex::macro->new('s', '{3,5}'); # /\s{3,5}/ =item * Ctext;> Returns the macro. print $macro->text; # '\s' =item * Ctype;> Returns the string C. =back =head2 Methods for C This class represents octal escapes. Objects have the following methods: =over 4 =item * Cnew($type,$q,$ng);> Creates a C object. Takes three arguments: the octal number (as a string), the quantity, and the non-greedy flag. my $oct = YAPE::Regex::oct->new('040'); # /\040/ =item * Ctext;> Returns the octal escape. print $oct->text; # '\040' =item * Ctype;> Returns the string C. =back =head2 Methods for C This class represents hexadecimal escapes. Objects have the following methods: =over 4 =item * Cnew($type,$q,$ng);> Creates a C object. Takes three arguments: the hexadecimal number (as a string), the quantity, and the non-greedy flag. my $hex = YAPE::Regex::hex->new('20','{2,}'); # /\x20{2,}/ =item * Ctext;> Returns the hexadecimal escape. print $hex->text; # '\x20' =item * Ctype;> Returns the string C. =back =head2 Methods for C This class represents UTF hexadecimal escapes. Objects have the following methods: =over 4 =item * Cnew($type,$q,$ng);> Creates a C object. Takes three arguments: the hexadecimal number (as a string), the quantity, and the non-greedy flag. my $utf8hex = YAPE::Regex::utf8hex->new('beef','{0,4}'); # /\x{beef}{2,}/ =item * Ctext;> Returns the hexadecimal escape. print $utf8hex->text; # '\x{beef}' =item * Ctype;> Returns the string C. =back =head2 Methods for C This class represents back-references. Objects have the following methods: =over 4 =item * Cnew($type,$q,$ng);> Creates a C object. Takes three arguments: the number of the back-reference, the quantity, and the non-greedy flag. my $bref = YAPE::Regex::bref->new(2,'','?'); # /\2?/ =item * Ctext;> Returns the backescape. print $bref->text; # '\2' =item * Ctype;> Returns the string C. =back =head2 Methods for C This class represents control character escapes. Objects have the following methods: =over 4 =item * Cnew($type,$q,$ng);> Creates a C object. Takes three arguments: the control character, the quantity, and the non-greedy flag. my $ctrl = YAPE::Regex::ctrl->new('M'); # /\cM/ =item * Ctext;> Returns the control character escape. print $ctrl->text; # '\cM' =item * Ctype;> Returns the string C. =back =head2 Methods for C This class represents named characters. Objects have the following methods: =over 4 =item * Cnew($type,$q,$ng);> Creates a C object. Takes three arguments: the name of the character, the quantity, and the non-greedy flag. my $named = YAPE::Regex::named->new('GREEK SMALL LETTER BETA'); # /\N{GREEK SMALL LETTER BETA}/ =item * Ctext;> Returns the character escape text. print $named->text; # '\N{GREEK SMALL LETTER BETA}' =item * Ctype;> Returns the string C. =back =head2 Methods for C This class represents C characters. Objects have the following methods: =over 4 =item * Cnew($q,$ng);> Creates a C object. Takes two arguments: the quantity and the non-greedy flag. my $named = YAPE::Regex::Char->new(2); # /\C{2}/ =item * Ctext;> Returns the escape sequence. print $Cchar->text; # '\C' =item * Ctype;> Returns the string C. =back =head2 Methods for C This class represents any other escaped characters. Objects have the following methods: =over 4 =item * Cnew($type,$q,$ng);> Creates a C object. Takes three arguments: the backslashed character, the quantity, and the non-greedy flag. my $slash = YAPE::Regex::slash->new('t','','?'); # /\t?/ =item * Ctext;> Returns the escaped character. print $slash->text; # '\t' =item * Ctype;> Returns the string C. =back =head2 Methods for C This class represents the dot metacharacter. Objects have the following methods: =over 4 =item * Cnew($q,$ng);> Creates a C object. Takes two arguments: the quantity, and the non-greedy flag. my $any = YAPE::Regex::any->new('{1,3}'); # /.{1,3}/ =item * Ctype;> Returns the string C. =back =head2 Methods for C This class represents character classes. Objects have the following methods: =over 4 =item * Cnew($chars,$neg,$q,$ng);> Creates a C object. Takes four arguments: the characters in the class, a C<^> if the class is negated (an empty string otherwise), the quantity, and the non-greedy flag. my $class = YAPE::Regex::class->new('aeiouy','^'); # /[^aeiouy]/ =item * Ctext;> Returns the character class. print $class->text; # [^aeiouy] =item * Ctype;> Returns the string C. =back =head2 Methods for C This class represents hexadecimal escapes. Objects have the following methods: =over 4 =item * Cnew($text,$q,$ng);> Creates a C object. Takes three arguments: the text, the quantity, and the non-greedy flag. The quantity and non-greedy modifier should only be present for I text, because of the way the parser renders the quantity and non-greedy modifier. my $text = YAPE::Regex::text->new('alphabet',''); # /alphabet/ my $text = YAPE::Regex::text->new('x','?','?'); # /x??/ =item * Ctype;> Returns the string C. =back =head2 Methods for C This class represents alternation. Objects have the following methods: =over 4 =item * Cnew;> Creates a C object. my $alt = YAPE::Regex::alt->new; # /|/ =item * Ctype;> Returns the string C. =back =head2 Methods for C This class represents in-line comments. Objects have the following methods: =over 4 =item * Cnew($comment,$x);> Creates a C object. Takes two arguments: the text of the comment, and whether or not the C regex modifier is in effect for this comment. Note that Perl's regex engine will stop a C<(?#...)> comment at the first C<)>, regardless of what you do. my $comment = YAPE::Regex::comment->new( "match an optional string of digits" ); # /(?#match an optional string of digits)/ my $comment = YAPE::Regex::comment->new( "match an optional string of digits", 1 ); # /# match an optional string of digits/ =item * Ctype;> Returns the string C. =item * Cxcomm;> Returns true or false, depending on whether the comment is under the C regex modifier. =back =head2 Methods for C This class represents whitespace under the C regex modifier. Objects have the following methods: =over 4 =item * Cnew($text);> Creates a C object. Takes one argument: the text of the whitespace. my $ws = YAPE::Regex::whitespace->new(' '); # / /x =item * Ctext;> Returns the whitespace. print $ws->text; # ' ' =item * Ctype;> Returns the string C. =back =head2 Methods for C This class represents C<(?ismx)> flags. Objects have the following methods: =over 4 =item * Cnew($add,$sub);> Creates a C object. Takes two arguments: a string of the modes to have on, and a string of the modes to explicitly turn off. The flags are displayed in alphabetical order. my $flags = YAPE::Regex::flags->new('is','m'); # /(?is-m)/ =item * Ctype;> Returns the string C. =back =head2 Methods for C This class represents the cut assertion. Objects have the following methods: =over 4 =item * Cnew(\@nodes);> Creates a C object. Takes one arguments: a reference to an array of objects to be contained in the cut. my $REx = YAPE::Regex::class->new('aeiouy','','+'); my $look = YAPE::Regex::cut->new(0,[$REx]); # /(?>[aeiouy]+)/ =item * Ctype;> Returns the string C. =back =head2 Methods for C This class represents lookaheads. Objects have the following methods: =over 4 =item * Cnew($pos,\@nodes);> Creates a C object. Takes two arguments: a boolean value indicating whether or not the lookahead is positive, and a reference to an array of objects to be contained in the lookahead. my $REx = YAPE::Regex::class->new('aeiouy'); my $look = YAPE::Regex::lookahead->new(0,[$REx]); # /(?![aeiouy])/ =item * Cpos;> Returns true if the lookahead is positive. print $look->pos ? 'pos' : 'neg'; # 'neg' =item * Ctype;> Returns the string C or C. =back =head2 Methods for C This class represents lookbehinds. Objects have the following methods: =over 4 =item * Cnew($pos,\@nodes);> Creates a C object. Takes two arguments: a boolean value indicating whether or not the lookbehind is positive, and a reference to an array of objects to be contained in the lookbehind. my $REx = YAPE::Regex::class->new('aeiouy','^'); my $look = YAPE::Regex::lookbehind->new(1,[$REx]); # /(?<=[^aeiouy])/ =item * Cpos;> Returns true if the lookbehind is positive. print $look->pos ? 'pos' : 'neg'; # 'pos' =item * Ctype;> Returns the string C or C. =back =head2 Methods for C This class represents conditionals. Objects have the following methods: =over 4 =item * Cnew($br,$t,$f,$q,$ng);> Creates a C object. Takes five arguments: the number of the back-reference (that's all that's supported in the current version), an array reference to the "true" pattern, an array reference to the "false" pattern, and the quantity and non-greedy flag. my $cond = YAPE::Regex::conditional->new( 2, [], [ YAPE::Regex::text->new('foo') ], '?', ); # /(?(2)|foo)?/ =item * Cbackref;> Returns the number of the back-reference the conditional depends on. print $br->backref; # 2 =item * Ctype;> Returns the string C)>, where I is the number of the back-reference. =back =head2 Methods for C This class represents non-capturing groups. Objects have the following methods: =over 4 =item * Cnew($on,$off,\@nodes,$q,$ng);> Creates a C object. Takes five arguments: the modes turned on, the modes explicitly turned off, a reference to an array of objects in the group, the quantity, and the non-greedy flag. The modes are displayed in alphabetical order. my $group = YAPE::Regex::group->new( 'i', 's', [ YAPE::Regex::macro->new('d', '{2}'), YAPE::Regex::macro->new('s'), YAPE::Regex::macro->new('d', '{2}'), ], '?', ); # /(?i-s:\d{2}\s\d{2})?/ =item * Ctype;> Returns the string C. =back =head2 Methods for C This class represents capturing groups. Objects have the following methods: =over 4 =item * Cnew(\@nodes,$q,$ng);> Creates a C object. Takes three arguments: a reference to an array of objects in the group, the quantity, and the non-greedy flag. my $capture = YAPE::Regex::capture->new( [ YAPE::Regex::macro->new('d', '{2}'), YAPE::Regex::macro->new('s'), YAPE::Regex::macro->new('d', '{2}'), ], ); # /(\d{2}\s\d{2})/ =item * Ctype;> Returns the string C. =back =head2 Methods for C This class represents code blocks. Objects have the following methods: =over 4 =item * Cnew($block);> Creates a C object. Takes one arguments: a string holding a block of code. my $code = YAPE::Regex::code->new(q({ push @poss, $1 })); # /(?{ push @poss, $1 })/ =item * Ctype;> Returns the string C. =back =head2 Methods for C This class represents closed parentheses. Objects have the following methods: =over 4 =item * Cnew($block);> Creates a C object. Takes one arguments: a string holding a block of code. my $later = YAPE::Regex::later->new(q({ push @poss, $1 })); # /(?{{ push @poss, $1 }})/ =item * Ctype;> Returns the string C. =back =head2 Methods for C This class represents closed parentheses. Objects have the following methods: =over 4 =item * Cnew($q,$ng);> Creates a C object. Takes two arguments: the quantity, and the non-greedy flag. This object is never needed in the tree; however, they are returned in the parsing stage, so that you know when they've been reached. my $close = YAPE::Regex::close->new('?','?'); # /)??/ =item * Ctype;> Returns the string C. =back =head1 TO DO This is a listing of things to add to future versions of this module. =over 4 =item * None! =back =head1 BUGS Following is a list of known or reported bugs. =over 4 =item * This documentation might be incomplete. =back =head1 SUPPORT Visit C's web site at F. =head1 SEE ALSO The C documentation, for information on the main class. =head1 AUTHOR The original author is Jeff "japhy" Pinyan (CPAN ID: PINYAN). Gene Sullivan (gsullivan@cpan.org) is a co-maintainer. =head1 LICENSE This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. See L. =cut