package Algorithm::AhoCorasick;
use warnings;
use strict;
use Algorithm::AhoCorasick::SearchMachine;
require Exporter;
our @ISA = qw(Exporter);
our %EXPORT_TAGS = ( 'all' => [ qw(
find_first
find_all
) ] );
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
our $VERSION = '0.03';
sub find_first {
my $text = shift;
my $m = Algorithm::AhoCorasick::SearchMachine->new(@_);
my $rv = $m->feed($text, sub { [ @_ ]; });
if (wantarray) {
return $rv ? @$rv : ();
} else {
return $rv ? $rv : undef;
}
}
sub find_all {
my $text = shift;
my $m = Algorithm::AhoCorasick::SearchMachine->new(@_);
my %total;
my $handle_all = sub {
my ($pos, $keyword) = @_;
if (!exists($total{$pos})) {
$total{$pos} = [ ];
}
push @{$total{$pos}}, $keyword;
undef;
};
$m->feed($text, $handle_all);
return keys(%total) ? \%total : undef;
}
1;
__END__
=head1 NAME
Algorithm::AhoCorasick - efficient search for multiple strings
=head1 VERSION
Version 0.03
=head1 SYNOPSIS
use Algorithm::AhoCorasick qw(find_all);
$found = find_all($text, @keywords);
if (!$found) {
print "no keywords found\n";
} else {
foreach $pos (sort keys %$found) {
$keywords = join ', ', @{$found->{$pos}};
print "$pos: $keywords\n";
}
}
=head1 DESCRIPTION
Aho-Corasick is a classic (1975) algorithm for locating elements of a
finite set of strings within an input text. It constructs a finite
state machine from a list of keywords, then uses the machine to locate
all occurrences of the keywords. Construction of the machine takes
time proportional to the sum of the lengths of the keywords and the
machine processes the input string in a single pass - that is, the
algorithm may be considerably more efficient than searching for each
keyword separately.
=head1 PROCEDURAL INTERFACE
The module exports 2 functions for the common use cases: C
for finding all matches, and C for finding whether a match
exists at all. Note that both functions must be explicitly imported
(i.e. with C