package Search::Mousse; use strict; our $VERSION = '0.32'; use base qw(Class::Accessor::Chained::Fast); __PACKAGE__->mk_accessors( qw(directory name stemmer key_to_id id_to_key id_to_value word_to_id id_to_related and ) ); use CDB_File; use CDB_File_Thawed; use List::Uniq qw(uniq); use Path::Class; use Search::QueryParser; use Set::Scalar; sub new { my $class = shift; my $self = {}; bless $self, $class; my %args = @_; $self->directory($args{directory}); $self->name($args{name}); $self->stemmer( $args{stemmer} || sub { my $words = lc shift; return uniq(split / /, $words); } ); $self->and($args{and} || 0); $self->_init; return $self; } sub _init { my ($self) = @_; my $name = $self->name; my $dir = $self->directory; my $filename = file($dir, "${name}_key_to_id.cdb"); tie my %cdb1, 'CDB_File', $filename or die "tie failed: $!\n"; $self->key_to_id(\%cdb1); $filename = file($dir, "${name}_id_to_key.cdb"); tie my %cdb2, 'CDB_File', $filename or die "tie failed: $!\n"; $self->id_to_key(\%cdb2); $filename = file($dir, "${name}_id_to_value.cdb"); tie my %cdb3, 'CDB_File_Thawed', $filename or die "tie failed: $!\n"; $self->id_to_value(\%cdb3); $filename = file($dir, "${name}_word_to_id.cdb"); tie my %cdb4, 'CDB_File_Thawed', $filename or die "tie failed: $!\n"; $self->word_to_id(\%cdb4); $filename = file($dir, "${name}_id_to_related.cdb"); if (-f $filename) { tie my %cdb7, 'CDB_File_Thawed', $filename or die "tie failed: $!\n"; $self->id_to_related(\%cdb7); } } sub fetch { my ($self, $key) = @_; my $id = $self->key_to_id->{$key}; return unless $id; return $self->id_to_value->{$id}; } sub fetch_related { my ($self, $key) = @_; my $id_to_value = $self->id_to_value; my $id = $self->key_to_id->{$key}; return unless $id; my $ids = $self->id_to_related->{$id} || []; return map { $id_to_value->{$_} } @$ids; } sub fetch_related_keys { my ($self, $key) = @_; my $id_to_key = $self->id_to_key; my $id = $self->key_to_id->{$key}; return unless $id; my $ids = $self->id_to_related->{$id} || []; return map { $id_to_key->{$_} } @$ids; } sub search { my ($self, $words) = @_; my @ids = $self->_search_ids($words); my @values = map { $self->id_to_value->{$_} } @ids; return @values; } sub search_keys { my ($self, $words) = @_; my @ids = $self->_search_ids($words); my @keys = map { $self->id_to_key->{$_} } @ids; return @keys; } sub _search_ids { my ($self, $words) = @_; my $qp = Search::QueryParser->new; my $query = $qp->parse($words); return unless $query; my @union; foreach my $term (@{$query->{""}}) { my $value = $term->{value}; my @values = $self->stemmer->($value); push @union, $values[0]; } my @plus; foreach my $term (@{$query->{"+"}}) { my $value = $term->{value}; my @values = $self->stemmer->($value); push @plus, $values[0]; } my @minus; foreach my $term (@{$query->{"-"}}) { my $value = $term->{value}; my @values = $self->stemmer->($value); push @minus, $values[0]; } if ($self->and) { push @plus, @union; @union = (); } my $s = Set::Scalar->new; if (@union) { foreach my $word (@union) { next unless exists $self->word_to_id->{$word}; my @ids = @{ $self->word_to_id->{$word} }; $s->insert(@ids); } foreach my $word (@plus) { return unless exists $self->word_to_id->{$word}; my @ids = @{ $self->word_to_id->{$word} }; my $s2 = Set::Scalar->new(@ids); $s = $s->intersection($s2); } } else { my $word = pop @plus; my @ids = @{ $self->word_to_id->{$word} }; $s->insert(@ids); foreach my $word (@plus) { return unless exists $self->word_to_id->{$word}; my @ids = @{ $self->word_to_id->{$word} }; my $s2 = Set::Scalar->new(@ids); $s = $s->intersection($s2); } } foreach my $word (@minus) { next unless exists $self->word_to_id->{$word}; my @ids = @{ $self->word_to_id->{$word} }; $s = $s->delete(@ids); } return $s->members; } 1; __END__ =head1 NAME Search::Mousse - A simple and fast inverted index =head1 SYNOPSIS my $mousse = Search::Mousse->new( directory => $directory, name => 'recipes', ); my $recipe = $mousse->fetch("Hearty Russian Beet Soup"); my @recipes = $mousse->search("crumb"); my @recipe_keys = $mousse->search_keys("italian soup"); =head1 DESCRIPTION L provides a simple and fast inverted index. It is intended for constant databases (this is why it can be fast). Documents have a key, keywords (which the document can later be search for with) and a value (which can be a Perl data structure or object). Use L to construct a database. The default stemmer is: sub { my $words = lc shift; return uniq(split / /, $words); } Why is it called Search::Mousse? Well, in culinary terms, mousses are simple to make, can include quite complicated ingredients, and are inverted before presentation. =head1 CONSTRUCTOR =head2 new The constructor takes a few arguments: the directory to store files in, and a name for the database. If you have a custom stemmer, also pass it in: my $mousse = Search::Mousse->new( directory => $directory, name => 'recipes', ); my $mousse2 = Search::Mousse->new( directory => $directory, name => 'photos', stemmer => \&stemmer, ); =head1 METHODS =head2 and If this is set to true, query terms are ANDed by default. Thus "white bread" would be parsed the same as "+white +bread": $mousse->and(1); =head2 fetch Returns a value from the database, given a key: my $recipe = $mousse->fetch("Hearty Russian Beet Soup"); =head2 fetch_related If you have used L to analyse the database, the fetch_related() method returns a list of values that are similar to the given key: my @recipes = $mousse->fetch_related("Hearty Russian Beet Soup"); =head2 fetch_related_keys If you have used L to analyse the database, the fetch_related_keys() method returns a list of keys that are similar to the given key: my @keys = $mousse->fetch_related_keys("Hearty Russian Beet Soup"); =head2 search Returns a list of values that match the search terms (but see and()): my @white_or_bread = $mousse->search("white bread"); my @white_bread = $mousse->search("+white +bread"); my @nonwhite_bread = $mousse->search("-white +bread"); =head2 search_keys Returns a list of keys that have all the keywords passed: my @recipe_keys = $mousse->search_keys("italian soup"); =head1 SEE ALSO L, L =head1 AUTHOR Leon Brocard, C<< >> =head1 COPYRIGHT Copyright (C) 2005, Leon Brocard This module is free software; you can redistribute it or modify it under the same terms as Perl itself.