#!/usr/bin/perl -w use strict; require HTML::Index::Store::BerkeleyDB; require HTML::Index::Document; use Getopt::Long; use vars qw( $opt_parser $opt_refresh $opt_block $opt_db $opt_compress $opt_stopword ); die "Usage: $0 [-parser ] [-stopword ] [-block <8|16|32>] [-refresh] [-db ] [-compress]\n" unless GetOptions qw( stopword=s db=s block=i refresh compress parser=s ) ; my $store = HTML::Index::Store::BerkeleyDB->new( VERBOSE => 1, PARSER => $opt_parser || 'html', DB => $opt_db, STOP_WORD_FILE => $opt_stopword, COMPRESS => $opt_compress, REFRESH => $opt_refresh, ); my $i = 0; my $t0 = time; for my $file ( @ARGV ) { my $doc = HTML::Index::Document->new( path => $file ); $store->index_document( $doc ); $i++; } my $dt = time - $t0; my $fps = $dt ? $i / $dt : ">" . $i; print "$i files indexed in $dt seconds ($fps files per second)\n";