#AnyDBM handling from perlindex: # NDBM_File as LAST resort package AnyDBM_File; # hide from indexer use vars '@ISA'; @ISA = qw(DB_File GDBM_File SDBM_File ODBM_File NDBM_File) unless @ISA; my $mod; for $mod (@ISA) { last if eval "require $mod" }; package Tk::Pod::Search_db; use strict; use vars qw($VERSION); $VERSION = sprintf("%d.%02d", q$Revision: 5.6 $ =~ /(\d+)\.(\d+)/); use Carp; use Fcntl; use File::Basename qw(dirname); use File::Spec; use Text::English; use Config; my $PREFIX = $Config::Config{prefix}; # Bug in perlindex: because of assuming Unix directory separators the # index files are stored in man/man1, not in man on Windows: my $IDXDIR = $^O eq 'MSWin32' ? $Config::Config{man1dir} : dirname $Config::Config{man1dir}; $IDXDIR ||= $PREFIX; # use perl directory if no manual directory exists # Debian uses a non-standard directory: if (-e "/etc/debian_version" && -d "/var/cache/perlindex") { $IDXDIR = "/var/cache/perlindex"; # XXX What to do if perlindex is installed by the user and uses # the man directory for storing the index files? } # Deliberately ignore the INDEXDIR environment variable which is used # by perlindex sub new { my $class = shift; my $idir = shift; $idir ||= $IDXDIR; my (%self, %IF, %IDF, %FN); my $if_file = File::Spec->catfile($idir, "index_if"); tie (%IF, 'AnyDBM_File', $if_file, O_RDONLY, 0644) or confess "Could not tie $if_file: $!\n". "Did you install Text::English and run 'perlindex -index'?\n"; my $idf_file = File::Spec->catfile($idir, "index_idf"); tie (%IDF, 'AnyDBM_File', $idf_file, O_RDONLY, 0644) or confess "Could not tie $idf_file: $!\n"; my $fn_file = File::Spec->catfile($idir, "index_fn"); tie (%FN, 'AnyDBM_File', $fn_file, O_RDONLY, 0644) or confess "Could not tie $fn_file: $!\n"; $self{IF} = \%IF; $self{IDF} = \%IDF; $self{FN} = \%FN; #xxx: -idir depended but where can I get this info? # o A fourth index file? # o todo: check perlindex index routine $self{PREFIX} = $PREFIX; bless \%self, $class; } # changes to perlindex's normalize # o removed useless(?) stemmer check # o lexicalized $word sub normalize { my $line = join ' ', @_; my @result; $line =~ tr/A-Z/a-z/; $line =~ tr/a-z0-9/ /cs; my $word; for $word (split ' ', $line ) { $word =~ s/^\d+//; next unless length($word) > 2; push @result, &Text::English::stem($word); } @result; } # changes for perlindex's search slightly modified sub searchWords { my($self, $_word, %args) = @_; my $restrict_pod = $args{-restrictpod}; if (defined $restrict_pod) { my(@modparts) = split /::/, $restrict_pod; $restrict_pod = join('[/\\\\]', map { quotemeta } @modparts); } #print "try words|", join('|',@_),"\n"; my %score; my $maxhits = 50; my (@unknown, @stop); my $IF = $self->{IF}; my $IDF = $self->{IDF}; my $FN = $self->{FN}; #xxx &initstop if $opt_verbose; my ($did, %post); #xxx TRY: { my($word) = normalize($_word); unless ($IF->{$word}) { #xxxif ($stop{$word}) { #xxx push @stop, $word; #xxx} else { #xxx push @unknown, $word; #xxx} next; } #my %post = unpack($p.'*',$IF->{$word}); %post = unpack('w*',$IF->{$word}); my $idf = log($FN->{'last'}/$IDF->{$word}); for $did (keys %post) { #xxx my ($maxtf) = unpack($p, $FN->{$did}); my ($maxtf) = unpack('w', $FN->{$did}); $score{$did} = 0 unless defined $score{$did}; # perl -w $score{$did} += $post{$did} / $maxtf * $idf; } } my @results; for $did (sort {$score{$b} <=> $score{$a}} keys %score) { my ($mtf, $path) = unpack('wa*', $FN->{$did}); next if ($restrict_pod && $path !~ /$restrict_pod/); $path = File::Spec->catfile($self->prefix, $path) unless $^O eq 'MSWin32'; # This seems to be a perlindex bug in MSWin32 push @results, $score{$did}, $path; last unless --$maxhits; } #print "results|", join('|',@results),"\n"; @results; } sub prefix { shift->{PREFIX}; } 1; __END__ =encoding iso-8859-2 =head1 NAME Tk::Pod::Search_db - dirty OO wrapper for C's search functionality =head1 SYNOPSIS ** THIS IS ALPHA SOFTWARE everything may and should change ** ** stuff here is more a scratch pad than docomentation! ** use Tk::Pod::Search_db; ... $idx = Tk::Pod::Search_db->new?(INDEXDIR)?; ... @hits = $idx->searchWords(WORD1,...); # @hits is a list of # relpath1,score1,... where # score is increasing $prefix = $idx->prefix(); @word = Tk::Pod::Search_db::normalize(STRING1,...); =head1 DESCRIPTION Module to search Pod documentation. Before you can use the module one should create the indices with C. =head1 MISSING Enable options like -maxhits (currently = 15). Solve PREFIX dependency. Interface for @stop and @unknown also as methods return lists for last searchWords call? Lots more ... =head1 METHODS =over 4 =item $idx = Tk::Pod::Search_db->new(INDEXDIR) Interface may change to support options like -maxhits =item $idx->seachWords(WORD1?,...?) search for WORD(s). Return a list of relpath1, score1, relpath2, score2, ... or empty list if no match is found. =item $pathprefix = $idx->pathprefix() The return path prefix and C<$relpath> give together the full path name of the Pod documentation. $fullpath = $patchprefix . '/' . $relpath B Should make it easy to use Tk::Pod::Search with perlindex but index specific prefix handling is a mess up to know. =back =head1 SEE ALSO L, L, L, L =head1 AUTHORS Achim Bohnet > Most of the code here is borrowed from L written by Ulrich Pfeifer >. Current maintainer is Slaven Reziæ >. Copyright (c) 1997-1998 Achim Bohnet. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut