The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
$VERSION = '0.01';

use strict;

package Data::Trie;

#creates a new Trie node and initializes its value and daughters to zero
sub new {
	my $self = {};
	my $class = shift;
	#does this node terminate a word?
	$self->{value} = 0;
	#is this node terminate a prefix of further words?
	$self->{daughters} = {};
	bless $self, $class;
}

#returns all words in the trie
sub getAll {
	my $self = shift;
	#keeps track of the path in the trie up to this point
	my $path = "";
	#calls a recursive routine to check the path
	return $self->_getAllRecurse($path);
}

#recursive routine to collect all words, called by getAll()
#DON'T CALL THIS DIRECTLY; use getAll() instead
sub _getAllRecurse {
	my $self = shift;
	my $path = shift;
	my $daughters = $self->{daughters};
	#the set of words to return
	my @result = ();
	#return the current path if the current node terminates a word
	if ($self->{value}) {
		push @result, $path;
	}
	my @keys = keys %$daughters;
	#check all daughter nodes recursively adding their results to current ones
	foreach my $letter (@keys) {
		my $newpath = $path . $letter;
		my @letterresult = $daughters->{$letter}->_getAllRecurse($newpath);
		push @result, @letterresult;
	}
	return @result;
}

#adds a word to the tree by recursively checking each letter of the word and
#adding nodes as needed.
sub add {
	my $self = shift;
	my $str = shift;
	#data can be added or not
	my $data = shift;
	#separates first letter from the rest
	my $first = substr $str, 0, 1;
	my $rest = substr $str, 1;
	my $daughters = $self->{daughters};
	#checks if there is a node for the first letter
	if (not exists $daughters->{$first}) {
		#adds a node if necessary
		$daughters->{$first} = Data::Trie->new;
	}
	my $daughter = $daughters->{$first};
	#is the word only one letter long?
	if (length $rest > 0) {
		#recurse on the remaining letters
		$daughter->add($rest, $data);
	} else {
		#set the value to 1 and store the data
		$daughter->{value} = 1;
		$daughter->{data} = $data;
	}
	return 1;
}

#removes a word from the trie (does NOT prune nodes)
sub remove {
	my $self = shift;
	my $str = shift;
	#splits the word into first letter and rest
	my $first = substr $str, 0, 1;
	my $rest = substr $str, 1;
	my $daughters = $self->{daughters};
	if (exists $daughters->{$first}) {
		my $daughter = $daughters->{$first};
		if (length $rest == 0) {
			$daughter->{value} = 0;
		} else {
			$daughter->remove($rest);
		}
	}
	return $str;
}

#looks up a word in the trie
sub lookup {
	my $self = shift;
	my $str = shift;
	#splits the word into first letter and rest
	my $first = substr $str, 0, 1;
	my $rest = substr $str, 1;
	my $daughters = $self->{daughters};
	#checks if the first letter matches a daughter
	if (not exists $daughters->{$first}) {
		#if not, lookup fails
		return 0;
	#if it does match, recurse on remaining letters
	} elsif (length $rest == 0) {
		return ($daughters->{$first}->{value}, $daughters->{$first}->{data});
	}	else {
		return $daughters->{$first}->lookup($rest);
	}
}

1;

=head1 NAME

Data::Trie - An implementation of a letter trie

=head1 SYNOPSIS

	use Data::Trie;
	$t = Data::Trie->new;
	$t->add('orange', 'kind of fruit');
	($result, $data) = $t->lookup->('orange');
	$t->remove('orange');
	$t->getAll;

=head1 DESCRIPTION

This module implements a letter trie data structure. This is a linked set of
nodes representing a set of words. Starting from the root, each letter of an
included word is a daughter node of the trie. Hence, if a word is in the trie,
there will be a path from root to leaf for that word. If a word is not in the
trie, there will be no such path.

This structure allows for a relatively compact representation of a set of words.
This particular implementation allows each word to be stored alone or with some
associated data item.

Note that the C<remove()> method does I<not> prune nodes and thus a C<Trie> can
only grow in size.

=head1 COMPARE

This implementation differs from L<Tree::Trie> in that C<lookup()> checks for a
match, rather than checking for whether the current string is a prefix.

=head1 VERSION

0.01

=head1 AUTHOR

Michael Hammond, I<hammond@u.arizona.edu>

=cut