#AnyDBM handling from perlindex: # NDBM_File as LAST resort package AnyDBM_File; # hide from indexer use vars '@ISA'; my @try = qw(DB_File GDBM_File SDBM_File ODBM_File NDBM_File) unless @ISA; my $mod; for $mod (@try) { if (eval "require $mod") { @ISA = $mod; last; } }; package Tk::Pod::Search_db; use strict; use vars qw($VERSION); $VERSION = sprintf("%d.%02d", q$Revision: 5.7 $ =~ /(\d+)\.(\d+)/); use Carp; use Fcntl; use File::Basename qw(dirname); use File::Spec; use Text::English; use Config; my $PREFIX = $Config::Config{prefix}; # Bug in perlindex: because of assuming Unix directory separators the # index files are stored in man/man1, not in man on Windows: my $IDXDIR = $^O eq 'MSWin32' ? $Config::Config{man1dir} : dirname $Config::Config{man1dir}; $IDXDIR ||= $PREFIX; # use perl directory if no manual directory exists # Debian uses a non-standard directory: if (-e "/etc/debian_version" && -d "/var/cache/perlindex") { $IDXDIR = "/var/cache/perlindex"; # XXX What to do if perlindex is installed by the user and uses # the man directory for storing the index files? } # Deliberately ignore the INDEXDIR environment variable which is used # by perlindex sub new { my $class = shift; my $idir = shift; $idir ||= $IDXDIR; my (%self, %IF, %IDF, %FN); my $if_file = File::Spec->catfile($idir, "index_if"); tie (%IF, 'AnyDBM_File', $if_file, O_RDONLY, 0644) or confess "Could not tie $if_file: $!\n". "Did you install Text::English and run 'perlindex -index'?\n"; my $idf_file = File::Spec->catfile($idir, "index_idf"); tie (%IDF, 'AnyDBM_File', $idf_file, O_RDONLY, 0644) or confess "Could not tie $idf_file: $!\n"; my $fn_file = File::Spec->catfile($idir, "index_fn"); tie (%FN, 'AnyDBM_File', $fn_file, O_RDONLY, 0644) or confess "Could not tie $fn_file: $!\n"; $self{IF} = \%IF; $self{IDF} = \%IDF; $self{FN} = \%FN; #xxx: -idir depended but where can I get this info? # o A fourth index file? # o todo: check perlindex index routine $self{PREFIX} = $PREFIX; bless \%self, $class; } # changes to perlindex's normalize # o removed useless(?) stemmer check # o lexicalized $word sub normalize { my $line = join ' ', @_; my @result; $line =~ tr/A-Z/a-z/; $line =~ tr/a-z0-9_/ /cs; my $word; for $word (split / /, $line ) { $word =~ s/^\d+//; next unless length($word) > 2; push @result, &Text::English::stem($word); } @result; } # changes for perlindex's search slightly modified sub searchWords { my($self, $term, %args) = @_; my @words = split / /, $term; my $restrict_pod = $args{-restrictpod}; if (defined $restrict_pod) { my(@modparts) = split /::/, $restrict_pod; $restrict_pod = join('[/\\\\]', map { quotemeta } @modparts); } #print "try words|", join('|',@_),"\n"; my $p = 'w'; my %score; my %termhits; my $maxhits = 50; my (@unknown, @stop); my $IF = $self->{IF}; my $IDF = $self->{IDF}; my $FN = $self->{FN}; #&initstop if $opt_verbose; for my $word (normalize(@words)) { unless ($IF->{$word}) { # if ($stop{$word}) { # push @stop, $word; # } else { # push @unknown, $word; # } next; } my %post = unpack($p.'*',$IF->{$word}); my $idf = log($FN->{'last'}/$IDF->{$word}); for my $did (keys %post) { my ($maxtf) = unpack($p, $FN->{$did}); $score{$did} = 0 unless defined $score{$did}; # perl -w $score{$did} += $post{$did} / $maxtf * $idf; $termhits{$did}++; } } my @results; for my $did (sort { $termhits{$b} <=> $termhits{$a} || $score{$b} <=> $score{$a} } keys %score) { my ($mtf, $path) = unpack($p.'a*', $FN->{$did}); next if ($restrict_pod && $path !~ /$restrict_pod/); $path = File::Spec->catfile($self->prefix, $path) unless $^O eq 'MSWin32'; # This seems to be a perlindex bug in MSWin32 push @results, { termhits => $termhits{$did}, score => $score{$did}, path => $path }; last unless --$maxhits; } #print "results|", join('|',@results),"\n"; @results; } sub prefix { shift->{PREFIX}; } 1; __END__ =encoding iso-8859-2 =head1 NAME Tk::Pod::Search_db - dirty OO wrapper for C's search functionality =head1 SYNOPSIS ** THIS IS ALPHA SOFTWARE everything may and should change ** ** stuff here is more a scratch pad than docomentation! ** use Tk::Pod::Search_db; ... $idx = Tk::Pod::Search_db->new?(INDEXDIR)?; ... @hits = $idx->searchWords(WORD1,...); # @hits is a list of # relpath1,score1,... where # score is increasing $prefix = $idx->prefix(); @word = Tk::Pod::Search_db::normalize(STRING1,...); =head1 DESCRIPTION Module to search Pod documentation. Before you can use the module one should create the indices with C. =head1 MISSING Enable options like -maxhits (currently = 15). Solve PREFIX dependency. Interface for @stop and @unknown also as methods return lists for last searchWords call? Lots more ... =head1 METHODS =over 4 =item $idx = Tk::Pod::Search_db->new(INDEXDIR) Interface may change to support options like -maxhits =item $idx->seachWords(WORD1?,...?) search for WORD(s). Return a list of relpath1, score1, relpath2, score2, ... or empty list if no match is found. =item $pathprefix = $idx->pathprefix() The return path prefix and C<$relpath> give together the full path name of the Pod documentation. $fullpath = $patchprefix . '/' . $relpath B Should make it easy to use Tk::Pod::Search with perlindex but index specific prefix handling is a mess up to know. =back =head1 SEE ALSO L, L, L, L =head1 AUTHORS Achim Bohnet > Most of the code here is borrowed from L written by Ulrich Pfeifer >. Current maintainer is Slaven Reziæ >. Copyright (c) 1997-1998 Achim Bohnet. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut