The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/local/bin/perl 
# -*- Perl -*- 		# Thu Apr 29 13:03:57 CDT 2004 
###############################################################################
# Written by Tim Skirvin <tskirvin@killfile.org>.  Copyright 2004, Tim Skirvin. 
# Redistribution terms are below.
###############################################################################
our $VERSION = "0.12";

###############################################################################
### User Configuration ########################################################
###############################################################################
use vars qw( $LOCALCONF $KIBOZEDIR $DB_TYPE $ARCHIVEGROUP $DEBUG $QUIET
	      $VERBOSE $MAX );

## Rather than having everything in this shared configuration, load this
## file to get additional configuration.  This file contains additional
## perl.  

$LOCALCONF = "$ENV{'HOME'}/.kibozerc";

## Where should we store all of our files?  This needs to be set or 
## nothing will run; also, the directory must already exist.

$KIBOZEDIR = "";

## What kind of database should we use to store history information?

$DB_TYPE  = "DB_File";

## If we're storing everything to an archive group, you can set it here.

$ARCHIVEGROUP = "";

## Do we want to print debugging information?  Use verbose status messages?
## Be entirely silent?  Set these things here.

$DEBUG    = 0;
$VERBOSE  = 0;
$QUIET    = 0;

## How many articles should we get, maximum?  Set to '0' to just get all
## of them.

$MAX = 0;

## If the modules are set up in a non-standard place, edit this line 
## as appropriate.
BEGIN { use lib '/home/tskirvin/dev/news-archive'; }

###############################################################################
### main() ####################################################################
###############################################################################

use strict;		# Good programming is our friend
use warnings;		

use Getopt::Std;	# Command line functions
use News::Archive;	# News functions
use News::Article;	
use File::Find;

use vars qw/*name *dir *prune/;
*name   = *File::Find::name;
*dir    = *File::Find::dir;
*prune  = *File::Find::prune;

sub wanted;

# Error Codes
our %ERROR = ( 'SUCCESS' => 0, 'CONFIG' => 1, 'SERVER' => 2 );

# Command-line configuration
use vars qw( %OPTS );
getopts('hvVQdc:j:a:', \%OPTS);

# Load local configuration from local configuration file
$LOCALCONF = $OPTS{'c'} if $OPTS{'c'};
if ( $LOCALCONF && -r $LOCALCONF ) { do $LOCALCONF }

&Usage 	 if $OPTS{'h'};		# Print usage information and exit
&Version if $OPTS{'v'};		# Print version information and exit

# Other command-line parsing; overrides localconf
$DEBUG   = $OPTS{'d'} if defined $OPTS{'d'};
$VERBOSE = $OPTS{'V'} || $DEBUG || $VERBOSE || 0;	# Give verbose messages?
$QUIET   = $OPTS{'Q'} 	        || $QUIET   || 0;
$ARCHIVEGROUP = $OPTS{'A'} if defined $OPTS{'A'};

# Create the options hash to start a News::Archive object
our %OPTHASH;  
$OPTHASH{ 'basedir' } = $KIBOZEDIR  if $KIBOZEDIR;
$OPTHASH{ 'db_type' } = $DB_TYPE    if $DB_TYPE;
$OPTHASH{ 'debug'   } = $DEBUG      if $DEBUG;

# Create the News::Archive object
our $archive = new News::Archive ( %OPTHASH ) 
	or Exit('SERVER', "Couldn't create/load archive item: ",
					     	News::Archive->error);

# If we've got $ARCHIVEGROUP set, then make sure we're subscribed to it.
$archive->subscribe($ARCHIVEGROUP) if ($ARCHIVEGROUP);

my $directory = shift @ARGV || Usage();

Exit('CONFIG', "No such directory: $directory") unless (-d $directory);

$|++; 

our $COUNT = 0;
File::Find::finddepth({wanted => \&wanted}, $directory);
my $groups = $archive->activefile;
my $hash = $groups->groups;
# foreach (keys %{$hash}) { print "$_: $$hash{$_}\n"; }
# foreach (keys %{$groups}) { print "$_: $$groups{$_}\n"; }
$archive->close;
Exit('SUCCESS', "$COUNT articles archived");

sub archive {
  my ($file) = @_;
  my $article = new News::Article;  $article->read($file);
  return undef unless $article;
  my $messageid = $article->header('message-id');

  # We need at least this many more headers to do anything with it
  foreach ( qw ( Message-ID from newsgroups subject date ) ) { 
    next if $article->header($_);
    warn "No header $_\n" if $VERBOSE;
    return undef;
  }

  # If this article has already been processed, skip it
  if ( $archive->article( $messageid ) ) {
    warn "Already processed '$messageid'\n" if $VERBOSE; 
    return undef;
  }
  
  # Debugging hook; don't worry about this in general.
  # $article->write(\*STDOUT);

  # Get the list of groups we're supposed to be saving the article into
  my @groups = split('\s*,\s*', $article->header('newsgroups') );
  map { s/\s+//g } @groups;

  # Make sure we're subscribed to all these groups
  foreach (@groups) { $archive->subscribe($_) }
  push @groups, $ARCHIVEGROUP if $ARCHIVEGROUP;	# We're subscribed already

  # Actually save the article.
  my $ret = $archive->save_article( 
	[ @{$article->rawheaders}, '', @{$article->body} ], @groups );
  $COUNT++ if $ret;
  if ($VERBOSE) { warn $ret ? "Accepted article $messageid\n"
			    : "Couldn't save article $messageid\n" }
  $ret ? 1 : 0;
}

###############################################################################
### Subroutines ###############################################################
###############################################################################

sub wanted {
  my ($dev,$ino,$mode,$nlink,$uid,$gid);
  return "" if ($MAX > 0 && $COUNT >= $MAX);
  (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) && ! -d _ && /^[0-9]+$/s
	&& archive($name)
	&& ( print "$name\n" );
}

## Usage ()
# Prints out a short help file and exits.
sub Usage {
  my $prog = $0; $prog =~ s%.*/%%g;		# Clean up the program name
  Exit('CONFIG', version($VERSION), <<EOM);
Archives news articles for later use with News::Archive

$prog is part of the News::Archive package, which is used to archive 
news articles in a reasonably efficient manner.  This particular script
is meant for use for personal archives, loading articles from an mbox-style
archie on STDIN and loading them into @{[ $KIBOZEDIR || "(unset)" ]}.

Configuration information is stored in the below configuration file;
please read the manual pages for more information on its configuration.

Usage: $prog [-hvVQd] [-c configfile] [-a groupname] < ARTICLE
	-h		Print this usage information and exit.
	-v		Print version information and exit.
	-c configfile	Load this configuration file instead of default.
			  Default: $LOCALCONF
	-a groupname	Add this group to all articles we download, so
			that they can be accessed uniformly.  
			  Default: @{[ $ARCHIVEGROUP || "Not Set" ]}
	-V		Verbose; print information on every article.
			  Default: @{[ $VERBOSE ? "On" : "Off" ]}
	-Q		Quiet mode; only print that which is absolutely
			  necessary (and even then think about it).
			  Default: @{[ $QUIET ? "On" : "Off" ]}
	-d		Debug; print debugging information (implies -V)
			  Default: @{[ $DEBUG ? "On" : "Off" ]}
EOM
}

## Version ()
# Prints out the version number and exits
sub Version { Exit('CONFIG', version($VERSION)) }

## Exit ( CODE, REASON )
# Exits the program cleanly with a proper error message
sub Exit {
  my ($code, $reason, @details) = @_;
  exit $ERROR{$code} unless $reason;
  # map { $_ = " - $_" } @details;
  $reason = join("\n", $reason, @details, '');
  warn $reason if (!$QUIET && $reason);
  exit $ERROR{$code};
}

## version ( [VERSION] )
# Returns the program name and version number
sub version {
  my $version = shift || $VERSION || "unknown";
  my $prog = $0; $prog =~  s%.*/%%g;		# Clean up the program name
  "$prog v$VERSION"
}

###############################################################################
### Documentation #############################################################
###############################################################################

=head1 NAME

newsrecurse.pl - Recursively archives news articles with News::Archive

=head1 SYNOPSIS

newsrecurse.pl [-hvVQd] [-c configfile] [-a groupname] PATH

=head1 DESCRIPTION

newsrecurse.pl is a part of the News::Archive package, which is used to
archive news articles in a reasonably efficient manner.  This particular
script is meant for use for loading past archives, loading them
recursively from all-numeric filenames in the directory structure at PATH
and loading them into $KIBOZEDIR.  

Configuration information is stored in the configuration file
(B<kibozerc>).  Command-line options are as follows:

=over 4

=item -h

Print this usage information and exit.

=item -v

Print version information and exit.

=item -d	

Debug; print debugging information (implies -V)

=item -c configfile	

Load this configuration file instead of default (~/.kibozerc).

=item -a groupname

Add this group to all articles we download, so that they can be accessed
uniformly.  

=item -V

Verbose; print information on every article.

=item -Q

Quiet mode; only print that which is absolutely necessary (and even then
think about it).

=back

=head1 NOTES

This was designed to work with the output of old kiboze.pl, which in turn
was compatible with the "classic" style INN file layout.  If you've got an
old news spool sitting around, this is the script you can use to start
looking at it.

=head1 REQUIREMENTS

B<News::Archive>

=head1 SEE ALSO

B<kibozerc>, B<kiboze.pl>

=head1 AUTHOR

Tim Skirvin <tskirvin@killfile.org>

=head1 LICENSE

This code may be redistributed under the same terms as Perl itself.

=head1 HOMEPAGE

http://www.killfile.org/~tskirvin/software/news-archive/

=head1 COPYRIGHT

Copyright 2003-2004, Tim Skirvin <tskirvin@killfile.org>

=cut

###############################################################################
### Version History ###########################################################
###############################################################################
# v0.11		Thu Apr 29 13:03:44 CDT 2004 
### First real version.
# v0.12		Tue May 25 14:40:26 CDT 2004 
### Offered some ways to only get a subset of the articles.