############################################################################## # The Faq-O-Matic is Copyright 1997 by Jon Howell, all rights reserved. # # # # This program is free software; you can redistribute it and/or # # modify it under the terms of the GNU General Public License # # as published by the Free Software Foundation; either version 2 # # of the License, or (at your option) any later version. # # # # This program is distributed in the hope that it will be useful, # # but WITHOUT ANY WARRANTY; without even the implied warranty of # # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # # GNU General Public License for more details. # # # # You should have received a copy of the GNU General Public License # # along with this program; if not, write to the Free Software # # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.# # # # Jon Howell can be contacted at: # # 6211 Sudikoff Lab, Dartmouth College # # Hanover, NH 03755-3510 # # jonh@cs.dartmouth.edu # # # # An electronic copy of the GPL is available at: # # http://www.gnu.org/copyleft/gpl.html # # # ############################################################################## use strict; ### SearchMod.pm ### ### Support for search functions ### package FAQ::OMatic::SearchMod; use FAQ::OMatic::Item; use FAQ::OMatic::I18N; use FAQ::OMatic::Words; use FAQ::OMatic; sub openWordDB { return if (defined FAQ::OMatic::getLocal('wordDB')); my $wordDBtoken; if (FAQ::OMatic::usedbm()) { my %wordDB; if (not dbmopen (%wordDB, "$FAQ::OMatic::Config::metaDir/search", 0400)) { FAQ::OMatic::gripe('abort', "Can't open dbm search database. " ."Have you run buildSearchDB? (Should I?)"); } $wordDBtoken = \%wordDB; } else { if (not open(OFFSETFILE, "<$FAQ::OMatic::Config::metaDir/search.offset")) { FAQ::OMatic::gripe('abort', "Can't open search.offset. " ."Have you run buildSearchDB? (Should I?)"); } $wordDBtoken = "files_are_open_yahoo!"; } if (not open(WORDSFILE, "<$FAQ::OMatic::Config::metaDir/search.words")) { FAQ::OMatic::gripe('abort', "Can't open search.words. " ."Have you run buildSearchDB? (Should I?)"); } if (not open(INDEXFILE, "<$FAQ::OMatic::Config::metaDir/search.index")) { FAQ::OMatic::gripe('abort', "Can't open search.index. " ."Have you run buildSearchDB? (Should I?)"); } FAQ::OMatic::setLocal('wordDB', $wordDBtoken); } sub closeWordDB { my $wordDB = FAQ::OMatic::getLocal('wordDB'); if (FAQ::OMatic::usedbm()) { dbmclose %{$wordDB}; undef %{$wordDB}; } else { close OFFSETFILE; } close WORDSFILE; close INDEXFILE; } # linear scan of .offset file, looking for $word. Pretty slow, # unless your dbm implementation is somehow very slow and broken, # as on sourceforge. sub scanOffsets { my $word = shift; seek (OFFSETFILE, 0, 0); my $line; # THANKS to Gary.Frost@ubsw.com for reporting the "Value of # construct can be "0"" error that occurs on his version # of perl; fixed here and elsewhere. while (defined($line = )) { chomp $line; my ($fileWord, $pair) = split(' ', $line, 2); if ($fileWord eq $word) { #FAQ::OMatic::gripe('debug', "found pair: $line"); return $pair; } } return undef; } sub getIndices { my $word = shift; my $pair; if (FAQ::OMatic::usedbm()) { my $wordDB = FAQ::OMatic::getLocal('wordDB'); $pair = $wordDB->{$word}; } else { $pair = scanOffsets($word); } # returns indexseek,wordseek pair # THANKS to Vicki Brown and jon * # for reporting unitialized value errors in this code. return defined($pair) ? split(' ', $pair) : (undef,undef); } sub getWordClass { my $word = shift; my @wordclass = (); openWordDB(); my ($indexseek, $wordseek) = getIndices($word); #FAQ::OMatic::gripe('debug', "got seeks $indexseek and $wordseek for $word"); if (defined $indexseek) { #grab all words in wordsfile with $word as a prefix seek WORDSFILE, $wordseek, 0; while (defined($_ = )) { chomp; if (m/^$word/) { push @wordclass, $_; } else { last; } } } return \@wordclass; } sub getMatchesForClass { my $classref = shift; # array ref for a class of "identical" words my %files; my $word; foreach $word (@{$classref}) { my ($indexseek,$wordseek) = getIndices($word); next if (not defined $indexseek); seek INDEXFILE, $indexseek, 0; while (defined($_ = )) { chomp; last if (m/^END$/); $files{$_}=1; } } my @matches = sort keys %files; return \@matches; } sub getMatchesForSet { my $params = shift; my $setref = $params->{'_searchArray'}; # array ref for complete set of user words to search $params->{'_minMatches'} = 'all' if ($params->{'_minMatches'} eq ''); my $minhits = $params->{'_minMatches'}; # we return only files with at least this many hits if ($minhits eq 'all') { # convert symbolic hits to a number $minhits = scalar(@{$setref}); } $params->{'_actualMatches'} = $minhits; my %accumulator=(); my @hitfiles=(); my ($word, $file); foreach $word (@{$setref}) { my $classref = getWordClass($word); my $matches = getMatchesForClass($classref); foreach $file (@{$matches}) { $accumulator{$file}++; } } foreach $file (sort keys %accumulator) { if ($accumulator{$file} >= $minhits) { push @hitfiles, $file; } } return \@hitfiles; } sub convertSearchParams { my $params = shift; my $pattern; my $encode_lang = FAQ::OMatic::I18N::language(); # given a user-input search string, we break it into "legal" words # and store it in another parameter. $pattern = $params->{'_search'}; if($encode_lang eq "ja_JP.EUC") { require NKF; import NKF; $pattern = nkf('-e', $pattern); } my @patternwords = FAQ::OMatic::Words::getWords( $pattern ); $params->{'_searchArray'} = \@patternwords; } sub addNewFiles { my $wordset = shift; #ary ref my $fileset = shift; # hash ref -- where to add results my $words = {}; # Get the list of files touched since last searchDB build if (not open HINTS, "<$FAQ::OMatic::Config::metaDir/searchHints") { # sorry, can't help ya return; } my @touchedFiles = ; close HINTS; # index each item my $filename; my $item; foreach $filename (@touchedFiles) { chomp $filename; $item = new FAQ::OMatic::Item($filename); $item->extractWords($words); } # for every word in the wordset, add all the files it appears # in to the fileset passed to us. my $word; foreach $word (@{$wordset}) { if ($words->{$word}) { foreach $filename (keys %{$words->{$word}}) { $fileset->{$filename} = 1; } } } # notice that if there were suffixes of the user's requested words # in the new content that weren't in the system anywhere when the # searchDB was built, then those suffixes won't be in the wordset, # and the search will miss them. Hey, wah, this is better than # missing ALL the new content, okay? :v) # this also screws up the counts of how many matches this file # had (since it could contribute matches from the searchDB lookup # AND the newFiles lookup), so I'm going to leave it turned off # for now. Rats. } sub getRecentSet { my $params = shift; my $recentList = []; my $durationdays = $params->{'_duration'}; # used directly to compare against perl's floating-point -M file test my $then = time() - $durationdays*24*60*60; # Used to compare against LastModifiedSecs field. # By 'days' we mean 24-hour periods, not calendar days. # (In the US, for example, there is a 23-hour calendar day in # April and a 25-hour one in the fall, what, in October? for daylight # savings time.) my $filei; foreach $filei (FAQ::OMatic::getAllItemNames()) { # use file time as a hint for which items we even need to open up. next if (-M "$FAQ::OMatic::Config::itemDir/$filei" >= $durationdays); # ...but only trust LastModifiedSecs field for final say on mod time. my $item = new FAQ::OMatic::Item($filei); my $lm = $item->{'LastModifiedSecs'} || 0; if ($lm > $then) { push @{$recentList}, $filei; } } return $recentList; } # reasonable text for 'n' days my %dayMap = ( 0 => gettext("zero days"), 1 => gettext("day"), 2 => gettext("two days"), 7 => gettext("week"), 14 => gettext("fortnight"), 31 => gettext("month"), # (31? a month, give or take. :v) 92 => gettext("three months"), 184 => gettext("six months"), 366 => gettext("year") ); sub getRecentMap { # get a copy of the day map (except for 0) # for use in creating the recent form my %recentMap = %dayMap; delete $recentMap{0}; return \%recentMap; } sub textDays { my $duration = shift || 0; my $textDayStr = $dayMap{$duration} || $duration." ".gettext("days"); return $textDayStr; } 1;