# pX/Common/iterator_engine.pl - fglock # use strict; use warnings; #use Smart::Comments; for debugging, look also at Filtered-Comments.pm =pod A "rule" function gets as argument a list: 0 - a string to match 1 - an optional "continuation" 2 - an optional "flags" hashref 'capture'=>1 means 'return whatever matches' it returns (or "yields"): undef - match failed or a hash containing: state - a "continuation" or undef bool - an "assertion" (true/false) match - the "match" tree or undef tail - the string tail or undef capture - the tree of captured things abort - the match was stopped by a { return } or a fail(), and it should not backtrack or whatever Continuations are used for backtracking. A "ruleop" function gets some arguments and returns a "rule". =cut # XXX - optimization - pass the string index around, # XXX instead of copying the whole string to $tail every time # XXX - weaken self-referential things sub ruleop::alternation { # alternation is first match (not longest). though we need a # separate longest match for tokens (putter on #perl6) # update: <%var> does longest match based on the keys length() (TimToady on #perl6) # note: the list in @$nodes can be modified at runtime my $nodes = shift; # die "alternation list is empty" unless ref($nodes) eq 'ARRAY' && @$nodes; return sub { ### testing alternations on : @_, $nodes return unless @$nodes; my $tail = $_[0]; my $state = $_[1] ? [ @{$_[1]} ] : [ 0, 0 ]; my $flags = $_[2]; my $match; while ( defined $state ) { ### alternation string to match: "$tail - (node,state)=@$state" $match = $nodes->[ $state->[0] ]->( $tail, $state->[1], $flags ); ### match: $match if ( $match->{state} ) { $state->[1] = $match->{state}; } else { $state->[0]++; $state->[1] = 0; ### next alternation state - (node,state):@$state $state = undef if $state->[0] > $#$nodes; } $match->{state} = $state; return $match if $match->{bool} || $match->{abort}; } return; } } sub ruleop::concat { # note: the list in @nodes can NOT be modified at runtime # update: this is ok, because we can use <$var><$var> instead return ruleop::concat( +shift, ruleop::concat( @_ ) ) if @_ > 2; my @nodes = @_; return sub { my $tail = $_[0]; my @state = $_[1] ? ( @{$_[1]} ) : ( 0, 0 ); my $flags = $_[2]; my @matches; while (1) { $matches[0] = $nodes[0]->( $tail, $state[0], $flags ); ### 1st match: $matches[0] return $matches[0] if $matches[0]{abort}; if ( ! $matches[0]{bool} ) { return unless defined $matches[0]{state}; @state = ( $matches[0]{state}, 0 ); next; } $matches[1] = $nodes[1]->( $matches[0]{tail}, $state[1], $flags ); ### 2nd match: $matches[1] return $matches[1] if $matches[1]{abort}; if ( ! $matches[1]{bool} ) { if ( ! defined( $matches[1]{state} ) ) { return unless defined $matches[0]{state}; @state = ( $matches[0]{state}, 0 ); } ### backtracking - state: @state ### backtracking - match: @matches next; } my $succ; if ( ! defined( $matches[1]{state} ) ) { $succ = [ $matches[0]{state}, 0 ] if defined $matches[0]{state}; } else { $succ = [ $state[0], $matches[1]{state} ]; } my $capture = []; ### capture: $matches[0]{capture},$matches[1]{capture} $capture = $matches[0]{capture} if $matches[0]{capture}; push @$capture, @{$matches[1]{capture}} if $matches[1]{capture}; undef $capture unless @$capture; return { bool => 1, match => [ @matches ], tail => $matches[1]{tail}, state => $succ, capture => $capture, }; } } } sub ruleop::constant { my $const = shift; return sub { ### matching constant:$_[0],$const return if ! $_[0] || $_[0] !~ m/^(\Q$const\E)(.*)/s; return { bool => 1, match => { constant => $1 }, capture => [ $1 ], tail => $2, } if $_[2]{capture}; # flags->{capture} return { bool => 1, match => { constant => $1 }, tail => $2, } } } sub ruleop::null { return sub { return { bool => 1, match => 'null', ( $_[2]->{capture} ? ( capture => [ '' ] ) : () ), tail => $_[0], } } }; sub ruleop::capture { # sets the 'capture' flag and return a labeled capture # XXX - generalize to: set_flag('capture',1) my $label = shift; my $node = shift; sub { my @param = @_; $param[2] = {} unless defined $param[2]; $param[2] = { %{$param[2]}, capture=>1 }; my $match = $node->( @param ); return unless $match->{bool}; return if $match->{abort}; my $new_match = { %$match }; $new_match->{capture} = [ { $label => $match->{capture} } ]; return $new_match; } } =for capture At runtime, this must return _only_ the capture set inside capture_closure: xx(xx(xx( capture_closure(..) ))) One way to do it is to post-process the match: try( xx(xx(xx( abort( capture_closure(..) ) ))) ) abort() sets a 'rule_finished' flag in the returned match, that makes it return until the start of the rule, which unsets the flag before returning. - this can also be used to do fail() and assert(), and 'no-backtracking checkpoints' =cut # experimental! sub ruleop::try { my $op = shift; return sub { my $match = $op->( @_ ); ### abortable match... $match->{abort} = 0; return $match; }; }; # experimental! sub ruleop::abort { my $op = shift; return sub { my $match = $op->( @_ ); ### aborting match: $match $match->{abort} = 1; return $match; }; }; # experimental! sub ruleop::negate { my $op = shift; return sub { my $tail = $_[0]; my $match = $op->( @_ ); return if $match->{bool}; return { bool => 1, match => 'null', tail => $tail, } }; }; # experimental! =for example # adds an 'before' or 'after' sub call, which may print a debug message ruleop::wrap( { before => sub { print "matching variable: $_[0]\n" }, after => sub { $_[0]->{bool} ? print "matched\n" : print "no match\n" }, }, \&variable ) =cut sub ruleop::wrap { my $debug = shift; my $node = shift; sub { $debug->{before}( @_ ) if $debug->{before}; my $match = $node->( @_ ); $debug->{after}( $match, @_ ) if $debug->{after}; return $match; } } # ------- higher-order ruleops sub ruleop::optional { return ruleop::alternation( [ $_[0], ruleop::null() ] ); } sub ruleop::null_or_optional { return ruleop::alternation( [ ruleop::null(), $_[0] ] ); } sub ruleop::greedy_plus { my $node = shift; my $alt; $alt = ruleop::concat( $node, ruleop::optional( sub{ goto $alt } ), ); return $alt; } sub ruleop::greedy_star { my $node = shift; return ruleop::optional( ruleop::greedy_plus( $node ) ); } sub ruleop::non_greedy_star { my $node = shift; ruleop::alternation( [ ruleop::null(), ruleop::non_greedy_plus( $node ) ] ); } sub ruleop::non_greedy_plus { my $node = shift; # XXX - needs optimization for faster backtracking, less stack usage return sub { my $tail = $_[0]; my $state = $_[1] || { state => undef, op => $node }; my $flags = $_[2]; # XXX - didn't work # my $match = $state->{op}->( $tail, $state->{state}, $flags ); my $match = $state->{op}->( $tail, undef, $flags ); return unless $match->{bool}; $match->{state} = { state => $match->{state}, op => ruleop::concat( $node, $state->{op} ), }; return $match; } } 1;