#!/usr/bin/perl -w use strict; ## Scott Wiersdorf ## Created: Fri May 18 12:41:29 MDT 2001 ## $SMEId: local/savelogs/script/savelogs,v 1.12 2004/08/30 18:02:43 scottw Exp $ ## savelogs use File::Basename; use POSIX (); use Cwd qw( abs_path ); use File::Copy; use vars qw($VERSION $BUILD); my $prognam = $0; $prognam =~ s!^.*/([^/]+)$!$1!; $VERSION = '1.92'; $BUILD = sprintf("%d/%02d/%02d", q$Id: savelogs,v 1.5 2008/12/19 05:18:00 scott Exp $ =~ m!(\d{1,4})/(\d{1,2})/(\d{1,2})!); ## logging levels; debugging levels are for displaying data values ## during execution use constant LEVEL0 => 0; ## no output except fatal errors use constant LEVEL1 => LEVEL0 + 1; ## LEVEL0 + start/finish stats, errors use constant LEVEL2 => LEVEL1 + 1; ## LEVEL1 + warnings, logfiles to process use constant LEVEL3 => LEVEL2 + 1; ## LEVEL2 + chdir, filter, phase completion use constant LEVEL4 => LEVEL3 + 1; ## LEVEL3 + phase core actions, phase beginning use constant LEVEL5 => LEVEL4 + 1; ## LEVEL4 + everything else use constant DEF_COUNT => 10; ## default count level (how many logs to preserve) my @logs = (); ## the list of logs we're processing this session ## -- set configuration defaults -- ## ## new options MUST be all lowercase my %config = ( ## general options 'help' => undef, 'version' => undef, 'debug' => LEVEL0, 'dry-run' => undef, 'home' => ($< ? (getpwuid($<))[7] : '/' ), 'config' => undef, 'process' => 'move,compress', ## logging options 'loglevel' => LEVEL0, 'logfile' => 'stdout', ## finding logs 'apacheconf' => undef, 'apachelog' => 'TransferLog|ErrorLog|AgentLog|RefererLog|CustomLog', 'apachelogexclude' => [ qw(^/dev/null$ \|) ], 'apacheinclude' => undef, 'apachehost' => [], 'log' => [], 'nolog' => [], 'gripe' => 1, ## moving logs 'size' => undef, 'sep' => '.', 'ext' => undef, 'datefmt' => '%y%m%d', 'hourly' => undef, 'postmovehook' => undef, 'force-pmh' => undef, 'chown' => undef, 'chmod' => undef, ## stemming logs 'stem' => 'today', 'stemhook' => undef, 'stemlink' => 'symbolic', ## filters 'filter' => undef, 'postfilterhook' => undef, 'force-pfh' => undef, ## archiving and compressing 'gtar' => undef, 'tar' => undef, 'archive' => undef, 'full-path' => undef, 'touch' => undef, 'count' => DEF_COUNT, 'period' => undef, 'gzip' => undef, 'compress' => undef, 'uncompress' => undef, 'clobber' => 1, ); ## -- fetch command-line options -- ## use Getopt::Long; my %opt = (); unless( GetOptions(\%opt, 'help|h', 'version', 'debug=i', 'settings', 'dry-run', 'home=s', 'config=s', 'process=s', 'loglevel=i', 'logfile:s', 'apacheconf=s', 'apachelog=s', 'apachelogexclude:s', \@{$opt{'apachelogexclude'}}, 'apacheinclude', 'apachehost:s', \@{$opt{'apachehost'}}, 'log:s', \@{$opt{'log'}}, 'nolog:s', \@{$opt{'nolog'}}, 'gripe!', 'size:i', 'sep:s', 'ext:s', 'datefmt=s', 'hourly!', 'chown=s', 'chmod=s', 'postmovehook=s', 'force-pmh', 'stem:s', 'stemhook=s', 'stemlink=s', 'filter=s', 'postfilterhook=s', 'force-pfh', 'gtar=s', 'tar=s', 'archive=s', 'full-path', 'touch!', 'count=i', 'period:i', 'gzip=s', 'compress=s', 'uncompress=s', 'clobber!', ) ) { usage(); } my $WHICH = find_which(); ## this must occur before find_binary my $SEQ_NO = 0; ## starting log sequence number my $SEP = '.'; my ($GTAR, $TAR, $ZIP, $GZIP, $COMPRESS, $UNCOMPRESS); ## -- check for version -- ## if( $opt{'version'} ) { die <<_VERSION_; ## diversion? This is $prognam version $VERSION ($BUILD) _VERSION_ } ## -- check for usage -- ## usage() if $opt{'help'}; ## -- parse configuration file and set defaults from it -- ## if( $opt{'config'} ) { ## solve chicken/egg problem if( $opt{'debug'} ) { $config{'debug'} = $opt{'debug'}; } $opt{'home'} = ( $opt{'home'} ? $opt{'home'} : $config{'home'} ); ## we need these clean NOW clean_home( $opt{'home'} ); clean_path( $opt{'config'} ); my $config = $opt{'home'} . $opt{'config'}; debug( LEVEL1, "CONF: Config file option specified. Checking for '$config'" ); ## parse config file debug( LEVEL1, "CONF: Config file '$config' found. Parsing..." ); parse_config($config, \%config); debug( LEVEL1, "CONF: Config file parsing complete." ); } ## -- parse command-line options and set defaults from it -- ## parse_command_line( \%config ); if( scalar(@{$config{'apachehost'}}) ) { $config{'have_apache_hosts'} = 1; } ## -- cleanup paths -- ## clean_home( $config{'home'} ); clean_path( $config{'logfile'}, $config{'archive'}, $config{'config'}, $config{'apacheconf'} ); ## -- open our log. After this point we may begin to issue write_log ## statements -- ## unless( $opt{'settings'} ) { open_log(); write_log( LEVEL1, "Savelogs begins." ); } ## wherein we do all our fudging to get things just right DEFAULTS: { ## assign the value of period to count; this would be nice to put in ## the post-processing section below but we have to guarantee that ## this assignment occurs before we iterate over %config if( $config{'period'} ) { $config{'count'} = $config{'period'}; } ## fixup size from kb to bytes if( defined $config{'size'} ) { $config{'size'} *= 1024; } ## if the filter option is specified, add the filter process option if( defined $config{'filter'} ) { my @opts = split(',', $config{'process'}); ## check for 'none' or an empty process option: assign filter if( lc($config{'process'}) eq 'none' || scalar(@opts) == 0 ) { @opts = qw(filter); } ## check for 'filter' or 'all': leave this alone elsif( $config{'process'} =~ /\bfilter\b/i || $config{'process'} =~ /\ball\b/i ) { ; # do nothing } ## check for 'move' process option: save 'move' and unshift filter elsif( $config{'process'} =~ /\bmove\b/i ) { my $tmp = shift @opts; unshift @opts, 'filter'; unshift @opts, $tmp; } ## no 'move' found: tack 'filter' to the front of the options else { unshift @opts, 'filter'; } ## rebuild process options $config{'process'} = join(',', @opts); } ## if the period option is given, force 'move,compress' process ## options if( defined $config{'period'} ) { if( $config{'process'} =~ /\bfilter\b/i ) { $config{'process'} = 'move,filter,compress'; } else { $config{'process'} = 'move,compress'; } } ## if the archive option is given, add the archive process option if( 'archive' && defined $config{'archive'} ) { my @opts = split(',', $config{'process'}); ## check for 'none' or empty process option: assign archive if( lc($config{'process'}) eq 'none' || scalar(@opts) == 0 ) { @opts = qw(archive); } ## check for 'archive' or 'all': leave this alone elsif( $config{'process'} =~ /\barchive\b/i || $config{'process'} =~ /\ball\b/i ) { ; # do nothing } ## check for 'move' and 'filter': save options and unshift archive elsif( $config{'process'} =~ /\bmove\b/i || $config{'process'} =~ /\bfilter\b/i ) { ## save either move or filter my @tmp = (); push @tmp, shift @opts; ## check for filter if( join(',', @opts) =~ /\bfilter\b/i ) { push @tmp, shift @opts; } ## add archive unshift @opts, 'archive'; ## restore any move and filter options unshift @opts, @tmp; } ## no 'move' or 'filter' found: tack 'archive' to the front of the options else { unshift @opts, 'archive'; } ## rebuild process options $config{'process'} = join(',', @opts); } ## fixup apache directives if( $config{'apachelog'} ) { $config{'apachelog'} = qr/^\s*(?:$config{'apachelog'})\s+(\S+)/io; } if( @{$config{'apachelogexclude'}} ) { $config{'apachelogexclude'} = join('|', @{$config{'apachelogexclude'}} ); $config{'apachelogexclude'} = qr($config{'apachelogexclude'})o if $config{'apachelogexclude'}; ## if one empty element is passed, this ## will make sure we don't create an empty ## regex (which matches everything) } ## fixup count; quietly decrement it to DWIM if( defined $config{'count'} ) { ## decrement $config{count} to no lower than zero. Zero always ## keeps one log. $config{'count'} = ( $config{'count'} > 0 ? $config{'count'} - 1 : 0 ); } ## find which which debug( LEVEL1, "DEF: Using '$WHICH' for which" ); ## set the separator $SEP = ( defined $config{'sep'} ? $config{'sep'} : '.' ); debug( LEVEL1, "DEF: Separator set to '$SEP'" ); ## set the gtar binary $GTAR = $config{'gtar'} = find_binary($config{'gtar'}, 'gtar') or do { write_log( LEVEL2, "Could not find a suitable archive binary (e.g., gtar)" ); }; ## set the tar binary we'd like to use $TAR = $config{'tar'} = find_binary($config{'gtar'}, $config{'tar'}, 'gtar', 'tar') or do { write_log( LEVEL2, "Could not find a suitable archive binary (e.g., tar)" ); }; debug( LEVEL1, "DEF: Using '$TAR' for archiving" ); ## set the gzip or compress binary we want to use $ZIP = $config{'zip'} = find_binary($config{'gzip'}, $config{'compress'}, 'gzip', 'compress') or do { write_log( LEVEL2, "Could not find a suitable compression binary (e.g., gzip)" ); }; debug( LEVEL1, "DEF: Using '$ZIP' for file compression" ); ## set gzip binary $GZIP = $config{'gzip'} = find_binary($config{'gzip'}, 'gzip') or do { write_log( LEVEL2, "Could not find a suitable gzip binary." ); }; debug( LEVEL1, "DEF: Using '$GZIP' for gzip binary" ); ## set compress binary $COMPRESS = $config{'compress'} = find_binary($config{'compress'}, 'compress') or do { write_log( LEVEL2, "Could not find a suitable compress binary." ); }; debug( LEVEL1, "DEF: Using '$COMPRESS' for compress binary" ); ## find an uncompress binary $UNCOMPRESS = $config{'uncompress'} = find_binary($config{'uncompress'}, 'uncompress') or do { write_log( LEVEL2, "Could not find a suitable 'uncompress' binary." ); }; debug( LEVEL1, "DEF: Using '$UNCOMPRESS' for uncompress binary" ); } ## DEFAULTS ## print settings if( $opt{'settings'} ) { for my $key ( keys %config ) { my $value = ( defined $config{$key} ? $config{$key} : 'undef' ); ## settings specified if( ref $value eq 'ARRAY' ) { printf( "SET: %-18s => ( ", $key ); my $values = join( ', ', @$value ); print "$values )\n"; } else { printf( "SET: %-18s => %s\n", $key, $value ); } } exit 0; } ## -- chdir home -- ## chdir( $config{'home'} ) or do { write_log( LEVEL0, "Fatal: Could not chdir to '" . $config{'home'} . "': $!\n" ); exit; }; write_log( LEVEL3, "Changed directory to '" . $config{'home'} . "'" ); ## -- phase 0: fetch logs to process -- ## FETCH_LOGS: { my @loggies = (); ## list of log files my %log_inodes = (); ## uniqify log file list ## fetch apachehost entries in blocks ## these loggies are hashrefs, not actual log file paths CHUNK_HOST: { ## $config{log_chunks}->[0] = { period => 3, apachehost => [ 'foo.com', 'bar.com' ] } my @chunks = grep { $_->{apachehost} && scalar(@{$_->{apachehost}}) } @{$config{log_chunks}}; last CHUNK_HOST unless @chunks; ## multiple logs per host: access, error, rewrite, etc. my @chunk_hosts = map { @{$_->{apachehost}} } @chunks; ## (foo.com, bar.com, etc.) my @apache_logs = fetch_apache_logs( $config{'apacheconf'}, @chunk_hosts ); my $host_logs = shift @apache_logs; ## this has our host => [log, log] map my @host_logs = (); for my $chunk ( @chunks ) { ## a chunk is currently the all the directives for my $host ( @{$chunk->{apachehost}} ) { ## for each host that these settings apply to... for my $log ( @{$host_logs->{$host}} ) { ## make an entry for each of this host's logs my %h_log = %$chunk; ## copying all the settings of the chunk $h_log{log} = $log; ## and make an entry for the logfile itself push @host_logs, \%h_log; ## added to @loggies later } } } push @loggies, @host_logs; } ## fetch logs in blocks ## these loggies are hashrefs, not actual log file paths CHUNK_LOGS: { ## $config{log_chunks}->[0] = { period => 3, log => [ '/var/log/foo', '/var/log/bar' ] } my @chunks = grep { $_->{log} && scalar(@{$_->{log}}) } @{$config{log_chunks}}; last CHUNK_LOGS unless @chunks; ## do regex expansion for each chunk and add those to the chunk settings for my $chunk ( @chunks ) { my @chunk_logs = regex_expand( @{$chunk->{log}} ); for my $log ( @chunk_logs ) { my %ch_log = %$chunk; $ch_log{log} = $log; push @loggies, \%ch_log; } } } ## fetch logs from apacheconf directive DO_GET_LOGS: { my @apache_logs = fetch_apache_logs( $config{'apacheconf'}, @{$config{'apachehost'}} ); shift @apache_logs; ## throw away the host => log mapping push @loggies, @apache_logs; ## fetch log directives (internal regex expansion) push @loggies, regex_expand(@{$config{'log'}}); ## now push any logs from the command-line (command-line globbing) push @loggies, @ARGV; } ## fetch nologs directives my %nolog = map { make_log_entry($_)->{'fulllog'} => 1 } regex_expand(@{$config{'nolog'}}); my %nolog_inodes = map { (lstat($_))[1] => 1 } keys %nolog; ## make log entries (our own special internal data structure) for my $log ( @loggies ) { my $log_entry; ## we have a special kind of log here, with baggage already ## set. These kind came from the chunks above. if( ref($log) ) { if( $log->{disabled} ) { $config{'gripe'} = 0; ## no complaining about disabled logs next; } $log_entry = $log; $log = $log_entry->{log}; next unless $log; } my $full_log = $config{'home'} . $log; debug( LEVEL1, "P0: Creating log entry for '$log'" ); my $inode = (lstat($full_log))[1] or do { debug( LEVEL1, "P0: Skipping '$log': Could not stat inode." ); next; }; ## skip duplicates if( $log_inodes{$inode}++ ) { write_log( LEVEL4, "Skipping duplicate log '$log'." ); next; } ## check for nolog entries if( exists $nolog_inodes{$inode} ) { write_log( LEVEL4, "Skipping '$log': matches nolog pattern or inode." ); next; } ## save this log with special path information $log_entry = make_log_entry( $log_entry ? $log_entry : $log); ## make sure log is regular file unless( -f $log_entry->{'fulllog'} ) { write_log( LEVEL4, "Skipping '$log': file does not exist." ); next; } push @logs, $log_entry; write_log( LEVEL2, "Found log '" . $log_entry->{'fulllog'} . "'" ); } write_log( LEVEL3, "Exiting fetch logs phase" ); } ## this little loop is here after the FETCH_LOGS block because if a log is ## culled because of size, we don't want to complain about it. We set ## gripe to 'no' and exit. if( scalar @logs ) { for my $log ( @logs ) { ## consider size of log if( $config{'size'} && -s $log->{'fulllog'} < $config{'size'} ) { debug( LEVEL1, "P0: size of '" . $log->{'fulllog'} . "': " . -s _ ); write_log( LEVEL4, "Skipping '" . $log->{'fulllog'} . "': log file is too small." ); undef $log; next; } } ## remove undefined log entries @logs = grep { defined } @logs; ## turn off griping if we have no logs undef $config{'gripe'} unless scalar @logs; } ## -- make sure we have logs -- ## GRIPE: { unless( scalar @logs ) { if( $config{'gripe'} ) { my $err = <<_NEED_LOGS_; You must specify one or more log files to process via the 'ApacheConf' directive, the 'ApacheHost' directive, or the 'Log' directive, or on the command-line. _NEED_LOGS_ usage( $err ); } ## don't gripe about no logs else { exit; } } } ################################# ## -- main log process loop -- ## ################################# ## -- phase 1: move logs -- ## MOVE: { if( $config{'process'} =~ /\ball\b/i || $config{'process'} =~ /\bmove\b/i ) { write_log( LEVEL4, "Entering move logs phase" ); my $default_ext = ( defined $config{'ext'} ? date_str($config{'ext'}) : date_str('today') ); for my $log ( @logs ) { next unless $log; my $period = (exists $log->{period} ? $log->{period} : ( exists $config{period} ? $config{period} : undef)); ## FIXME: the *intent* of originally setting $EXT at the ## start of the script was to make sure that all logs ## would be renamed the same at the end of the day. By ## allowing Ext and DateFmt in the blocks, we ## delay that calculation out until now, which *could* ## cause in rare cases a differing EXT from log to log. ## ## The fix for that would be to make those calculations ## outside of this loop. This bug will only be introduced ## for blocks, since the default (non-Group) way ## still works as it used to. ## no period renaming (normal) if( ! defined $period ) { my $src = $log->{'fulllog'}; my $sep = exists $log->{'sep'} ? $log->{'sep'} : $SEP; my $ext = exists $log->{'ext'} ? $log->{'ext'} : undef; $ext = ( $ext ? ( exists $log->{'datefmt'} ? date_str($ext, $log->{'datefmt'}) : date_str($ext) ) : ( exists $log->{'datefmt'} ? date_str('today', $log->{'datefmt'}) : $default_ext ) ); if( exists $log->{'hourly'} ? $log->{'hourly'} : $config{'hourly'} ) { $ext = sprintf( "%s%c", $ext, (97+(localtime(time()))[2]) ); } my $dst = $src . $sep . $ext; unless( do_rename( $src, $dst ) ) { undef $log; next; } ## update internal log information $log->{'archive'} = $log->{'newlog'} = $log->{'log'} . $sep . $ext; $log->{'archpath'} = $log->{'newlogpath'} = $log->{'logpath'}; } ## do period renaming, if needed and able else { $SEP = '.'; my $ext = '0'; ## a small data structure about this log: ## logpath: path to log ## log: name of log w/o path ## sep: separator between log and extension ## ext: extension ## comp: compression extension my $count = ( defined $log->{'period'} ? ( $log->{'period'} > 0 ? $log->{'period'} - 1 : 0 ) : ( defined $log->{'count'} ? ( $log->{'count'} > 0 ? $log->{'count'} - 1 : 0 ) : $config{'count'} ) ); my %log_arg = ( 'logpath' => $log->{'logpath'}, 'log' => $log->{'log'}, 'sep' => $SEP, 'src_ext' => undef, 'dst_ext' => $ext, 'cmp_ext' => undef, 'count' => $count, ); ## move the logs unless( period_log( \%log_arg ) ) { undef $log; next; } ## update internal log information $log->{'archive'} = $log->{'newlog'} = $log->{'log'} . $SEP . $ext; $log->{'archpath'} = $log->{'newlogpath'} = $log->{'logpath'}; } ## touch old log file if needed my $touch = ( exists $log->{'touch'} ? $log->{'touch'} : $config{'touch'} ); if( $touch ) { my $file = $log->{'fulllog'}; write_log( LEVEL4, "Touching '$file'" ); unless( $config{'dry-run'} ) { open( TOUCH, ">>$file" ) or do { write_log( LEVEL2, "Could not touch '$file': $!\n" ); next; }; close TOUCH; } write_log( LEVEL5, "'$file' touched." ); } } write_log( LEVEL3, "Exiting move logs phase" ); } } ## -- post move logs hook -- ## POSTMOVEHOOK: { ## skip hook unless needed last POSTMOVEHOOK unless $config{'postmovehook'}; write_log( LEVEL4, "Entering post-move-hook phase" ); ## make sure we have some logs (otherwise we may run something ## we didn't want to) unless( scalar(@logs) || $config{'force-pmh'}) { write_log( LEVEL2, "No logs to process. Skipping postmovehook phase" ); last POSTMOVEHOOK; } ## translate variables $config{'postmovehook'} =~ s/\$APACHE_CONF/$config{'apacheconf'}/g; $config{'postmovehook'} =~ s/\$HOME\b/$config{'home'}/g; write_log( LEVEL4, "Executing: '" . $config{'postmovehook'} . "'" ); ## execute command (backticks here) and save the output unless( $config{'dry-run'} ) { ## postmovehook contains a $LOG macro: execute postmovehook once for each log if( $config{'postmovehook'} =~ /\$LOG\b/ ) { write_log( LEVEL5, "\$LOG macro detected in postmovehook string" ); for my $log ( @logs ) { my $logfile = mkpath($log->{'newlogpath'}, $log->{'newlog'}); my $movecmd = $config{'postmovehook'}; $movecmd =~ s/\$LOG/$logfile/g; debug( LEVEL1, "P1.5: movecmd set to '$movecmd'" ); write_log( LEVEL4, "Executing move command '$movecmd' on '$logfile'" ); my @command_output = `$movecmd 2>&1`; write_log( LEVEL3, "postmovehook command returned non-zero status. Ignoring." ) if $?; write_log( LEVEL5, "Command output (CMD):" ); for my $command_line ( @command_output ) { write_log( LEVEL5, "CMD> $command_line" ); } } } ## postmovehook does not contain a $LOG macro: postmovehook executes once else { my @command_output = `$config{'postmovehook'} 2>&1`; write_log( LEVEL3, "postmovehook command returned non-zero status. Ignoring." ) if $?; write_log( LEVEL5, "Command output (CMD):" ); for my $command_line ( @command_output ) { write_log( LEVEL5, "CMD> $command_line" ); } sleep 1; } } write_log( LEVEL3, "Exiting post-move-hook phase" ); } ## -- chown/chmod fixups -- ## CHOWN: { write_log( LEVEL4, "Entering chown/chmod block" ); for my $log ( @logs ) { my $log_name = mkpath($log->{'newlogpath'}, $log->{'newlog'}); if( my $chown = (exists $log->{chown} ? $log->{chown} : $config{chown}) ) { my($uid, $gid) = split(':', $chown); $uid = ($uid !~ /^\d+$/ ? ($uid eq '' ? -1 : (defined getpwnam($uid) ? getpwnam($uid) : -1) ) : $uid ); $gid = ($gid !~ /^\d+$/ ? ($gid eq '' ? -1 : (defined getgrnam($gid) ? getgrnam($gid) : -1) ) : $gid ); write_log( LEVEL5, "Chowning '$log_name' to $uid:$gid"); chown $uid, $gid, $log_name; } if( my $chmod = (exists $log->{chmod} ? $log->{chmod} : $config{chmod}) ) { write_log( LEVEL5, "Chmoding '$log_name' to $chmod"); chmod oct($chmod), $log_name; } } } ## -- phase 2: filter logs -- ## FILTER: { if( $config{'process'} =~ /\ball\b/i || $config{'process'} =~ /\bfilter\b/i ) { write_log( LEVEL4, "Entering filter phase" ); ## check for a filter unless( $config{'filter'} ) { write_log( LEVEL3, "No filter specified. Skipping." ); last FILTER; } ## pipe the log through the filter and save the output of the ## filter as the file itself for my $log ( @logs ) { next unless $log; ## make sure we're home chdir( $config{'home'} ) or do { write_log( LEVEL1, "Could not chdir to '" . $config{'home'} . "': $!\n" ); next; ## die? }; ## set temp file my $tmp_file = mkpath($log->{'newlogpath'}, '.' . $log->{'newlog'} . $$); debug( LEVEL1, "P2: Setting \$tmp_file to '$tmp_file'" ); ## clean up if( -e $tmp_file ) { write_log( LEVEL4, "Unlinking temporary file '$tmp_file'" ); unlink $tmp_file or do { write_log( LEVEL2, "Couldn't unlink temp file '$tmp_file': $!\n" ); next; }; } ## filter that puppy my $filter = $config{'filter'}; my $logfile = mkpath($log->{'newlogpath'}, $log->{'newlog'}); $filter =~ s/\$LOG/$logfile/g; debug( LEVEL1, "P2: \$filter set to '$filter'" ); write_log( LEVEL4, "Filtering '$logfile' through '$filter'" ); unless( $config{'dry-run'} ) { ## open temp file open TMP, ">$tmp_file" or do { write_log( LEVEL1, "Couldn't open temp file '$tmp_file': $!\n" ); next; }; ## open process open( FILTER, "$filter|" ) or do { write_log( LEVEL1, "Couldn't open filter '$filter': $!\n" ); next; }; ## filter it local $_; while( ) { print TMP; } ## close process close FILTER; close TMP; ## rename tmp file write_log( LEVEL4, "Renaming filter output '$tmp_file' to '$logfile'" ); rename $tmp_file, $logfile or do { write_log( LEVEL1, "Couldn't rename '$tmp_file' to '$logfile': $!\n" ); next; }; } write_log( LEVEL5, "Filtering '$logfile' via '$filter' complete." ); } write_log( LEVEL4, "Exiting filter phase" ); } } ## -- phase 2.5: post filter hook -- ## POSTFILTERHOOK: { last POSTFILTERHOOK unless $config{'postfilterhook'}; write_log( LEVEL4, "Entering post-filter-hook phase" ); ## make sure we have some logs (otherwise we may run something we ## didn't want to) unless( scalar(@logs) || $config{'force-pfh'}) { write_log( LEVEL2, "No logs to process. Skipping postfilterhook phase" ); last POSTFILTERHOOK; } $config{'postfilterhook'} =~ s/\$APACHE_CONF/$config{'apacheconf'}/g; $config{'postfilterhook'} =~ s/\$HOME\b/$config{'$home'}/g; write_log( LEVEL4, "Executing: '" . $config{'postfilterhook'} . "'" ); ## execute command (backticks here) and save the output unless( $config{'dry-run'} ) { ## postfilterhook contains a $LOG macro: execute postfilterhook once for each log if( $config{'postfilterhook'} =~ /\$LOG\b/ ) { write_log( LEVEL5, "\$LOG macro detected in postfilterhook string" ); for my $log ( @logs ) { my $logfile = mkpath($log->{'newlogpath'}, $log->{'newlog'}); my $filtercmd = $config{'postfilterhook'}; $filtercmd =~ s/\$LOG/$logfile/g; debug( LEVEL1, "P2.5: filtercmd set to '$filtercmd'" ); write_log( LEVEL4, "Executing filter command '$filtercmd' on '$logfile'" ); my @command_output = `$filtercmd 2>&1`; write_log( LEVEL3, "postfilterhook command returned non-zero status. Ignoring." ) if $?; write_log( LEVEL5, "Command output (CMD):" ); for my $command_line ( @command_output ) { write_log( LEVEL5, "CMD> $command_line" ); } } } ## postfilterhook does not contain a $LOG macro: postfilterhook executes once else { my @command_output = `$config{'postfilterhook'} 2>&1`; write_log( LEVEL3, "postfilterhook command returned non-zero status. Ignoring." ) if $?; write_log( LEVEL5, "Command output (CMD):" ); for my $command_line ( @command_output ) { write_log( LEVEL5, "CMD> $command_line" ); } sleep 1; } } write_log( LEVEL3, "Exiting post-filter-hook phase" ); } ## -- phase 2.75: stem logs -- ## ## we create a symlink with the same filename stem of the log STEM: { last STEM unless $config{'stem'} && $config{'stemhook'}; write_log( LEVEL4, "Entering stem phase" ); ## make symbolic links for my $log ( @logs ) { next unless $log; ## determine link location and unlink it if it exists my $link = mkpath( $log->{'newlogpath'}, $log->{'log'} . $SEP . $config{'stem'} ); unlink $link if -f $link && $config{'clobber'}; ## make a hard link if( $config{'stemlink'} =~ /^hard/i ) { my $logfile = mkpath( $log->{'newlogpath'}, $log->{'newlog'} ); debug( LEVEL1, "P2.75: Linking $link -> $logfile" ); unless( $config{'dry-run'} ) { link( $logfile, $link ) or do { write_log( LEVEL1, "Skipping: Could not link $logfile to $link: $!" ); next; }; } } ## make a copy: expensive! elsif( $config{'stemlink'} =~ /^copy/i ) { my $logfile = mkpath( $log->{'newlogpath'}, $log->{'newlog'} ); debug( LEVEL1, "P2.75: copying $logfile to $link" ); unless( $config{'dry-run'} ) { copy( $logfile, $link ) or do { write_log( LEVEL1, "Skipping: Could not copy $logfile to $link: $!" ); next; }; } } ## make a symbolic link else { my $logfile = $log->{'newlog'}; debug( LEVEL1, "P2.75: Symlinking $link -> $logfile" ); unless( $config{'dry-run'} ) { symlink( $logfile, $link ) or do { write_log( LEVEL1, "Skipping: Could not symlink $logfile to $link: $!" ); next; }; } } } STEMHOOK: { write_log( LEVEL4, "Entering post-stem-hook phase" ); ## translate variables $config{'stemhook'} =~ s/\$APACHE_CONF/$config{'apacheconf'}/g; $config{'stemhook'} =~ s/\$HOME/$config{'home'}/g; write_log( LEVEL4, "Executing: '" . $config{'stemhook'} . "'" ); ## make sure we have some logs (otherwise we may run something ## we didn't want to) unless( scalar(@logs) ) { write_log( LEVEL2, "No logs to process. Skipping stemhook phase" ); last STEMHOOK; } ## execute command (backticks here) and save the output unless( $config{'dry-run'} ) { my @command_output = `$config{'stemhook'} 2>&1`; write_log( LEVEL3, "stemhook command returned non-zero status. Ignoring." ) if $?; write_log( LEVEL5, "Command output (CMD):" ); for my $command_line ( @command_output ) { write_log( LEVEL5, "CMD> $command_line" ); } sleep 1; } write_log( LEVEL3, "Exiting post-stem-hook phase" ); } ## remove the link for my $log ( @logs ) { next unless $log; my $link = mkpath($log->{'newlogpath'}, $log->{'log'} . $SEP . $config{'stem'}); ## unlink debug( LEVEL1, "P2.75: Unlinking $link" ); unless( $config{'dry-run'} ) { unlink $link or do { write_log( LEVEL1, "Could not unlink '$link': $!\n" ); next; }; } } write_log( LEVEL3, "Exiting stem phase" ); } ## -- phase 3: archive logs -- ## ARCHIVE: { if( $config{'process'} =~ /\ball\b/i || $config{'process'} =~ /\barchive\b/i ) { write_log( LEVEL4, "Entering archive phase" ); ## make sure we have a tar binary unless( $TAR ) { write_log( LEVEL1, "No tar binary found. Skipping archive phase." ); last ARCHIVE; } ## each log gets its own archive, unless $config{'archive'} ## is specified. If $config{'archive'} is given without a ## pathname, then all files in a particular directory will ## get archived together under the $config{'archive'} name ## specified. If $config{'archive'} contains a path, all files ## in this session will be archived to it for my $log ( @logs ) { my $logfile = undef; my $archive = undef; my $flags = '-rf'; next unless $log; ## make sure we're home chdir( $config{'home'} ) or do { write_log( "Fatal: Could not chdir to '" . $config{'home'} . "': $!\n" ); exit; }; ## set archive names based on config. We fix this up here ## (versus in the move phase) in case the user skips the ## archive phase (this phase). Otherwise, the compression ## phase will get the wrong name. $log->{'archive'} = ( $config{'archive'} ? basename($config{'archive'}) : $log->{'log'} . '.tar' ); debug( LEVEL1, "P3: \$log->{'archive'} set to '" . $log->{'archive'} . "'" ); ## if $config{'archive'} has path information, we take that ## path here, otherwise we use the original/moved log path if( $config{'archive'} && $config{'archive'} =~ m!/! ) { $log->{'archpath'} = dirname($config{'archive'}); debug( LEVEL1, "P3: \$log->{'archpath'} set to '" . $log->{'archpath'} . "'" ); $archive = abs_path( $config{'home'} . $log->{'archpath'} ) . '/'; debug( LEVEL1, "P3: \$archive set by user to '$archive'" ); } ## create an full-path archive. We're already in our home ## directory from which the full path will be created if( $config{'full-path'} ) { ## because we're operating from this home directory, we ## need to have the full path to the file to be archive. $logfile = mkpath($log->{'newlogpath'}, $log->{'newlog'}); debug( LEVEL1, "P3: \$logfile set to '$logfile'" ); } ## create a relative-path archive. Chdir to the directory ## where the file is located so the path stored in the archive ## will be relative to this directory else { chdir( $log->{'newlogpath'} ) or do { write_log( LEVEL1, "Could not chdir to '" . $log->{'newlogpath'} . "': $!\n" ); next; }; debug( LEVEL1, "P3: chdir to '" . $log->{'newlogpath'} . "' successful" ); ## set log name $logfile = $log->{'newlog'}; } ## the name of our humble archive we're writing to. If we're ## absolute, $archive already has the path information. If ## we're relative, $archive should be blank. $archive .= $log->{'archive'}; debug( LEVEL1, "P3: Appending '" . $log->{'archive'} . "' to \$archive => '$archive'" ); ## if the humble archive exists, append to it if( -f $archive ) { ## default } ## maybe an already gzip'ed archive? elsif( -f "$archive.gz" ) { ## decompress and set the compress flag for next phase ## ## we currently don't set the compress flag for next ## phase yet ## check gzip if( $GZIP ) { write_log( LEVEL4, "Expanding '$archive.gz' with '$GZIP' before append...\n" ); system( $GZIP, '-d', "$archive.gz" ) and do { write_log( LEVEL1, "Error expanding '$archive.gz'. Skipping." ); next; }; } else { write_log( LEVEL1, "Too timid to expand '$archive.gz'. Skipping." ); next; } } ## maybe an already compressed archive? elsif( -f "$archive.Z" ) { ## decompress and set the compress flag for next phase ## ## we currently don't set the compress flag for next ## phase yet ## check zip if( $GZIP ) { write_log( LEVEL4, "Expanding '$archive.Z' with '$GZIP' before append...\n" ); system( $GZIP, '-d', "$archive.Z" ) and do { write_log( LEVEL1, "Error expanding '$archive.Z'. Skipping." ); next; }; } ## try uncompress elsif( $UNCOMPRESS ) { write_log( LEVEL4, "Uncompressing '$archive' with '$UNCOMPRESS' before append...\n" ); system( $UNCOMPRESS, "$archive.Z" ) and do { write_log( LEVEL1, "Error uncompressing '$archive.Z'. Skipping." ); next; }; } else { write_log( LEVEL1, "Too timid to expand '$archive.Z'. Skipping." ); next; } } ## a new archive else { $flags = '-cf'; } ## at this point we have a) no archive or b) an ## uncompressed archive. We need to see if the user ## specified 'count' and if $archive exists we need to ## make sure that adding a new file won't take us over ## quota. If it does take us over quota, we need to delete ## the oldest files (based on name). This implies that ## certain naming conventions must apply or bets are off. ## verify file count in this archive COUNT: { ## currently, a bug in GNU tar prevents --delete from ## working correctly with large tar files. We either ## need to work around it by re-creating the archive ## or upgrading tar last COUNT; if( $config{'count'} && -f $archive ) { write_log( LEVEL4, "Checking file count in $archive" ); local $_ = "$TAR -tf $archive"; my @cmd = split; open TAR, "@cmd|" or do { write_log( LEVEL1, "Error opening '$_': $! Skipping.\n" ); next; }; my @files = sort ; close TAR; chomp @files; ## if we have fewer than 'count', we're ok to add ## one more file below my $count_diff = scalar(@files) - $config{'count'}; if( $count_diff < 0 ) { last COUNT; } ## looks like we have too many files in this ## archive. We need to trim this archive by the ## number of files we're over plus one (to make ## room for the new file). $count_diff++; ## trim this many files ## figure out which are the "oldest" files my $oldest = find_oldest( $count_diff, \@files ); for my $file ( @$oldest ) { debug( LEVEL1, "Scheduling '$file' for removal from '$archive'" ); } ## delete these files from this archive if( scalar @$oldest ) { unless( $config{'dry-run'} ) { system( $TAR, '--delete', '-f', $archive, @$oldest ) or do { write_log( "Error deleting (@$oldest) from '$archive'. Skipping." ); next; }; } } ## couldn't ascertain which files to remove. ## Archive the file anyway so we don't lose it. else { ## whatever last COUNT; } } } write_log( LEVEL4, "Appending '$logfile' to '$archive'." ); unless( $config{'dry-run'} ) { if( system( $TAR, $flags, $archive, $logfile ) ) { write_log( LEVEL1, "Error writing '$logfile' to '$archive'. Skipping." ); next; } } ## FIXME: insert double-check code here: read tar contents, ## check integrity of tar file, etc. write_log( LEVEL5, "'$logfile' append to '$archive' complete" ); ## save the archive name we really used $log->{'archive'} = basename($archive); } write_log( LEVEL3, "Exiting archive phase" ); } } ## -- phase 4: compress archives -- ## COMPRESS: { if( $config{'process'} =~ /\ball\b/i || $config{'process'} =~ /\bcompress\b/i ) { write_log( LEVEL4, "Entering compress phase" ); ## make sure we have a compression binary unless( $ZIP ) { write_log( LEVEL1, "No compression binary found. Skipping compression phase." ); last COMPRESS; } ## compress each archive we've created for my $log ( @logs ) { my $archive = undef; next unless $log; ## make sure we're home chdir( $config{'home'} ) or do { write_log( LEVEL0, "Fatal: Could not chdir to '" . $config{'home'} . "': $!\n" ); exit; }; debug( LEVEL1, "P4: \$log->{'archive'} = '" . $log->{'archive'} . "'" ); debug( LEVEL1, "P4: \$log->{'archpath'} set to '" . $log->{'archpath'} . "'" ); ## see if we have user-specified archpath information if( $log->{'archpath'} ne $log->{'newlogpath'} ) { $archive = abs_path(mkpath($config{'home'}, $log->{'archpath'})) . '/'; debug( LEVEL1, "P4: \$archive is set to '$archive'" ); } ## create a relative-path archive else { debug( LEVEL1, "P4: using relative paths" ); chdir( $log->{'newlogpath'} ) or do { write_log( LEVEL1, "Could not chdir to '" . $log->{'newlogpath'} . "': $!\n" ); next; }; debug( LEVEL1, "P4: chdir to '" . $log->{'newlogpath'} . "' successful" ); } ## the name of our humble archive we're writing to $archive .= $log->{'archive'}; debug( LEVEL1, "P4: \$archive set to '$archive'" ); ## make sure it's not already compressed. This isn't the best ## way (by filename extension) but since we're assuming we're ## the only ones who'll be horsing around with this file ## directly, we can feel somewhat comfortable with that. if( $archive =~ /\.(?:gz|Z)$/ ) { write_log( LEVEL3, "Archive '$archive' already compressed. Skipping" ); next; } ## check for already compressed file from previous pass ## (we don't update $log->{'archive'} for each file so we ## need to check this to make sure) if( !-f $archive && (-f "$archive.Z" || -f "$archive.gz") ) { write_log( LEVEL4, "$archive already compressed. Skipping." ); next; } ## make sure we've got an archive to play with unless( -f $archive ) { write_log( LEVEL1, "Error: Archive '$archive' does not exist!. Skipping" ); next; } ## at this point we know we have an archive to compress: ## either a tar file or a plain log file my @cmd = ( $ZIP ); my $clobber = ( exists $log->{clobber} ? $log->{clobber} : $config{clobber} ); push @cmd, '-f' if $clobber; push @cmd, $archive; write_log( LEVEL4, "Compressing '$archive'" ); debug( LEVEL1, "P4: Compress command: '@cmd'" ); unless( $config{'dry-run'} ) { if( system( @cmd ) ) { write_log( LEVEL1, "Error compressing archive. Make sure archive is not already compressed and that '$archive' exists" ); next; } } write_log( LEVEL5, "'$archive' compression complete." ); } write_log( LEVEL3, "Exiting compress phase" ); } } ## -- phase 5: delete logs -- ## DELETE: { if( $config{'process'} =~ /\ball\b/i || $config{'process'} =~ /\bdelete\b/i ) { write_log( LEVEL4, "Entering delete phase" ); ## delete each log or moved log for my $log ( @logs ) { next unless $log; my $logfile = mkpath( $log->{'newlogpath'}, $log->{'newlog'} ); debug( LEVEL1, "P5: \$logfile set to '$logfile'" ); ## make sure we're home chdir( $config{'home'} ) or do { write_log( LEVEL1, "Could not chdir to '" . $config{'home'} . "': $!\n" ); next; ## die? }; write_log( LEVEL4, "Deleting '$logfile'" ); unless( $config{'dry-run'} ) { unlink $logfile or do { write_log( LEVEL1, "Could not delete '$logfile': $!\n" ); next; }; } write_log( LEVEL5, "'$logfile' deletion complete." ); } write_log( LEVEL3, "Exiting delete phase" ); } } ## -- close our log -- ## write_log( LEVEL1, "Savelogs ends." ); close_log(); exit; ## make sure home directory has a trailing slash sub clean_home { for my $file ( @_ ) { next unless defined $file; next unless -d $file; ## make it an absolute path $file = abs_path($file); ## add trailing slash $file .= '/' unless $file =~ m!/$!; ## untaint while we're at it if( $file =~ m/^(.*)$/ ) { $file = $1; } } } ## make sure paths have no leading slash sub clean_path { for my $file ( @_ ) { next unless defined $file; debug( LEVEL1, "clean_path: \$file set to $file" ); ## strip leading slash(es) $file =~ s!^/!!g; ## untaint while we're at it if( $file =~ m/^(.*)$/ ) { $file = $1; } debug( LEVEL1, "clean_path: \$file cleaned to '$file'" ); } } ## write to STDOUT, unless 'logfile' or 'stderr' directives are given. sub open_log { CONFIG: { ## check for stdout if( $config{'logfile'} =~ /^stdout$/i ) { open LOG, ">&STDOUT" or do { warn "Could not log to STDOUT: $!\n"; warn "Trying STDERR instead.\n"; $config{'logfile'} = 'stderr'; redo CONFIG; }; } ## try stderr elsif( $config{'logfile'} =~ /^stderr$/i ) { open LOG, ">&STDERR" or do { warn "Could not log to STDERR: $!\n"; warn "Trying default for logging instead.\n"; $config{'logfile'} = undef; redo CONFIG; }; } ## try a log file: home should have trailing slash -OR- ## logfile leading slash elsif( $config{'logfile'} ) { my $log = $config{'home'} . $config{'logfile'}; debug( LEVEL1, "open_log: log set to '$log'" ); open LOG, ">>$log" or do { warn "Could not open '$log' for appending: $!\n"; warn "Trying STDOUT for logging instead.\n"; $config{'logfile'} = ''; redo CONFIG; }; } ## try stdout as the failover else { open LOG, ">&STDOUT" or do { warn "Could not log to STDOUT: $!\n"; die "Quitting.\n"; }; } } return 1; } sub write_log { my $level = shift; unless( defined($level) ) { warn "No log level given. No logging done.\n"; return 0; } my @args = @_; ## messages unless( scalar @args ) { warn "No log message given. No logging done.\n"; return 0; }; ## why waste our time and yours? return 1 if $level > $config{'loglevel'}; local $_; my $date = scalar localtime; for ( @args ) { chomp; print LOG "[$date] [$prognam] $_\n"; } return 1; } sub close_log { close LOG; return 1; } sub parse_config { my $config_file = shift; my $config = shift; open CONFIG, $config_file or do { warn "Could not read config file '$config_file': $!\nSkipping config file.\n"; return undef; }; local $_; while( ) { next if /^[#;]/; ## skip comments next if /^\s*$/; ## skip empty lines ## each directive needs to be treated as a separate ## config file and its settings stored into a single new ## key of %config if( m{\s*}i ) { $$config{log_chunks} ||= []; my $chunk = parse_log_chunk($config, \*CONFIG); push @{ $$config{log_chunks} }, $chunk; next; } ## some whitespace (optional), a directive, some ## whitespace, a value, some whitespace (optional) unless( m/^\s*(\S+)\s+(.+)\s*$/ ) { debug( LEVEL1, "Skipping config file line: $_" ); next; } my $directive = lc($1); my $value = $2; ## normalize boolean values ## FIXME: outstanding bug: "Period No" in a config file ## FIXME: becomes "Period => 0", which is different. my $newval = ( $value =~ /^(?:false|no|off|undef|nope|0)$/i ? 0 : $value ); $newval = ( $newval =~ /^(?:true|yes|on|defined|yup|1)$/i ? 1 : $newval ); unless( exists $$config{$directive} ) { debug( LEVEL1, "Config file directive '$directive' unmatched. Skipping." ); next; } ## set directive debug( LEVEL1, "Config: $directive => $newval" ); debug( LEVEL1, "Config: rewriting $directive directive '$value' => '$newval'" ); ## if this directive is a repeatable directive, store this ## value in an array if( 'ARRAY' eq ref $$config{$directive} ) { push @{$$config{$directive}}, $newval; } ## otherwise, store the scalar else { $$config{$directive} = $newval; } } close CONFIG; return 1; } sub parse_log_chunk { my $config = shift; my $fh = shift; my %chunk = (); local $_; while( <$fh> ) { next if /^[#;]/; ## skip comments next if /^\s*$/; ## skip empty lines return \%chunk if m{^\s*}i; next unless m/^\s*(\S+)\s+(.+)\s*$/; my $directive = lc($1); my $value = $2; ## 'disabled' is a Group-specific directive next unless exists $$config{$directive} or $directive eq 'disabled'; my $newval = ( $value =~ /^(?:false|no|off|undef|nope|0)$/i ? 0 : $value ); $newval = ( $newval =~ /^(?:true|yes|on|defined|yup|1)$/i ? 1 : $newval ); if( 'ARRAY' eq ref $$config{$directive} ) { $chunk{$directive} ||= []; push @{ $chunk{$directive} }, $newval; } else { $chunk{$directive} = $newval; } if( $directive eq 'apachehost' ) { if( scalar(@{$chunk{$directive}}) ) { $config{'have_apache_hosts'} = 1; } } } return {}; ## throw away incomplete chunks } ## read in config file, if any; then override settings with ## command-line options, if any sub parse_command_line { my $config = shift; ## now override config file options with command line options for my $key ( map { lc($_) } keys %opt ) { my $value = $opt{$key}; unless( exists $$config{$key} ) { debug( LEVEL1, "Command-line directive '$key' unmatched. Skipping." ); next; } ## set directive my $print_value = $value; if( 'ARRAY' eq ref $value ) { $print_value = join( ', ', @$value ); next unless scalar @$value; } debug( LEVEL1, "Command: $key => '$print_value'" ); $$config{$key} = $value; } return 1; } APACHE_CONF: { ## the purpose of this block is to give lexical context to %conf ## so it becomes a static variable that will survive recursion my @conf = (); my %conf = (); my $server_root; my %host_logs = (); sub fetch_apache_logs { my $httpd_conf = shift; ## should be relative to $config{home} and leading slash stripped my %hosts = map { $_ => 1 } @_; ## hosts to look for my @logs = (); ## make sure apachelog is set. This shouldn't be an issue because ## we force command-line and config file to have a value for this ## directive. unless( $config{'apachelog'} ) { write_log( LEVEL2, "No apachelog directive found. Skipping apache configuration file." ); return; } return unless $httpd_conf; ## find logging directives in Apache configuration file ## $config{'home'} must be prepended to $httpd_conf to create ## the "real" path to the configuration file. FIND_LOGS: { ## expand httpd_conf variable (it may be a directory or a wildcard) EXPAND_CONF: { ## directory check if( -d $config{'home'} . $httpd_conf ) { opendir DIR, $config{'home'} . $httpd_conf or do { warn "Could not open $httpd_conf: $!\n"; return; }; my @dirs = map { $httpd_conf . '/' . $_ } grep { !/^\.\.?$/ } readdir DIR; closedir DIR; for my $dir ( @dirs ) { my($host_logs, @new_logs) = fetch_apache_logs($dir, (keys %hosts ? keys %hosts : ())); push @logs, @new_logs; } last FIND_LOGS; } ## this will expand any wildcards my @glob_conf = grep { s!^$config{'home'}!! } glob $config{'home'} . $httpd_conf; if( scalar @glob_conf > 1 ) { debug( LEVEL1, "Found " . scalar @glob_conf . " config files from glob '$httpd_conf'" ); for my $glob ( @glob_conf ) { my ($host_logs, @new_logs) = fetch_apache_logs($glob, (keys %hosts ? keys %hosts : ())); push @logs, @new_logs; } last FIND_LOGS; } } ## flag this conf file (or directory or glob) as "seen" so we ## don't process it again INO: { debug( LEVEL1, "Checking '$httpd_conf' inode for duplicate processing" ); ## stat will give us the target of any symlinks my $ino = (stat($config{'home'} . $httpd_conf))[1]; unless( $ino ) { write_log( LEVEL0, "Skipping '$httpd_conf': Could not stat: $!\n" ); return; } ## check for previous try if( exists $conf{$ino} ) { write_log( LEVEL2, "Skipping '$httpd_conf': already processed this session" ); return; } push @conf, $httpd_conf; ## preserve ordering $conf{$ino} = $httpd_conf; ## remember visited inodes (to avoid loops) } unless( -f $config{'home'} . $httpd_conf ) { write_log( LEVEL2, "Skipping ApacheConf file '$httpd_conf': file does not exist." ); return; } ## setup apache log regular expression debug( LEVEL1, "Log search pattern set to '$config{apachelog}' " ); ## setup apache log exclude regular expression if( $config{'apachelogexclude'} ) { debug( LEVEL1, "Exclude pattern set to '$config{apachelogexclude}'" ); } ## read apache configuration file my $HTTPD; SERVER_ROOT: { if( open $HTTPD, $config{'home'} . $httpd_conf ) { ## we allow only one ServerRoot directive last SERVER_ROOT if defined $server_root; ## get server root local $_; while( <$HTTPD> ) { next unless /^ServerRoot\s+"?([^"]+)"?\s*$/i; $server_root = $1; clean_path($server_root); $server_root .= '/' if $server_root && $server_root !~ m!/$!; debug( LEVEL1, "ServerRoot set to '$server_root'" ); last; } unless( defined $server_root ) { close $HTTPD; write_log( LEVEL0, "Skipping ApacheConf directive: No server root found in $httpd_conf.\n" ); return; } } else { write_log( LEVEL0, "Skipping ApacheConf directive: Could not open $httpd_conf: $!\n" ); return; } } ## now parse the rest of the config file for log directives my $host_state = 0; my @host_logs = (); seek $HTTPD, 0, 0; local $_; LINE: while( <$HTTPD> ) { ## skip comments next LINE if /^\#/; ## escape needed for emacs happiness ## process include directives, if desired if( $config{'apacheinclude'} && /^\s*Include\s+(.+?)\s*$/i ) { my $include = $1; ## relative paths need fixup unless( m!^/! =~ $include ) { debug( LEVEL1, "Appending '$server_root' to include file $include" ); $include = $server_root . $include; } debug( LEVEL1, "Recursively searching '$include' now..." ); my ($host_logs, @new_logs) = fetch_apache_logs($include, (keys %hosts ? keys %hosts : ())); push @logs, @new_logs; next LINE; } ## if we have ApacheHost set, we skip all non-matching ## virtualhost logs. We don't do the server's logs either. ## This is a small state machine. APACHE_HOST: { last APACHE_HOST unless $config{'have_apache_hosts'}; if( /^\s*!i ) { if( $host_state && $host_state ne 1 ) { ## we found our hostname for my $log_entry ( @host_logs ) { ## skip excluded patterns if( $config{'apachelogexclude'} ) { if( $log_entry =~ $config{'apachelogexclude'} ) { write_log( LEVEL3, "Skipping '$log_entry': Exclude pattern match." ); next LINE; } } ## found a log entry $host_logs{$host_state} ||= []; push @{$host_logs{$host_state}}, $log_entry; push @logs, $log_entry; debug( LEVEL1, "Found host '$log_entry' for '$host_state' in '$httpd_conf'" ); } } @host_logs = (); $host_state = 0; next LINE; } ## in a block if( $host_state ) { ## 1 or 2... if( /$config{'apachelog'}/ ) { push @host_logs, $1; } ## we need to tie this to the log if( $host_state eq 1 && /^\s*Server(?:Name|Alias)\s+(.*)/ ) { if( $hosts{$1} ) { # $host_state = 2; $host_state = $1; next LINE; } } } next LINE; } ## skip non-log directives. We grab the first argument after ## the log directive because CustomLog takes multiple format ## arguments after the log name. ## ## We grab all the non-whitespace characters immediately after ## the log directive. ## next LINE unless /$config{'apachelog'}/; my $log_entry = $1; ## skip excluded patterns if( $config{'apachelogexclude'} ) { if( $log_entry =~ $config{'apachelogexclude'} ) { write_log( LEVEL3, "Skipping '$log_entry': Exclude pattern match." ); next LINE; } } ## found a log entry push @logs, $log_entry; debug( LEVEL1, "Found '$log_entry' (server) in '$httpd_conf'" ); } close $HTTPD; } ## FIND_LOGS; ## if the log name begins with a /, we don't prepend $server_root for my $log ( @logs ) { unless( $log =~ m!^/! ) { unless( $log =~ m!^$server_root! ) { $log = $server_root . $log; } } } ## process the host_logs for my $host ( keys %host_logs ) { for my $log ( @{ $host_logs{$host} } ) { next if $log =~ m!^/!; next if $log =~ m!^$server_root!; $log = $server_root . $log; } } return (\%host_logs, @logs); } } ## APACHE_CONF; ## takes a log file name and returns a log entry reference ## we're careful to remove leading slashes from absolute pathnames in ## this function so that we don't try to write somewhere we can't ## later on sub make_log_entry { my $log = shift; my %entry = (); if( ref($log) ) { %entry = %$log; $log = $log->{log}; } $entry{'sequence'} = ++$SEQ_NO; ## set log paths (the name and location of the log) $entry{'log'} = basename($log); $entry{'logpath'} = dirname($log); $entry{'logpath'} =~ s!^/!!g; ## remove leading slash(es) $entry{'fulllog'} = mkpath( $entry{'logpath'}, $entry{'log'} ); ## set new log paths (what we name our log to) $entry{'newlog'} = $entry{'log'}; $entry{'newlogpath'} = $entry{'logpath'}; ## set archive paths (the name and location of the archive) ## we set the $config{'archive'} in the archiving stage because ## if the user specifies a $config{'archive'}, and we skip the ## archive stage (because it wasn't specified, then we'll get an ## error during the compression stage because of naming ## differences. $entry{'archive'} = $entry{'log'}; $entry{'archpath'} = $entry{'logpath'}; return \%entry; } sub regex_expand { my @regexen = @_; my @logs = (); for my $regex ( @regexen ) { write_log( LEVEL3, "Globbing '$regex'" ); ## return a list of globbed files with home stripped off my @globbed = grep{ s!^$config{'home'}!! } glob $config{'home'} . $regex; for my $globbed ( @globbed ) { ## skip globs if( $globbed =~ /\.(?:tar|t?gz)$/io ) { write_log( LEVEL5, "Skipped glob: '$globbed'" ); next; } write_log( LEVEL5, "Found glob: '$globbed'" ); push @logs, $globbed; } } return @logs; } sub date_str { my $date = shift; my $fmt = shift || $config{'datefmt'}; use constant DAY_SECS => 60 * 60 * 24; ## today's date if( $date eq 'today' ) { $date = time(); } ## yesterday's date elsif( $date eq 'yesterday' ) { $date = time() - DAY_SECS; } ## some other string that's not a date else { return $date; } return POSIX::strftime( $fmt, localtime($date) ); } sub find_binary { my @binaries = @_; ## check for user specified binary for my $binary ( @binaries ) { next unless $binary; ## use config if( defined $config{$binary} && -x $config{$binary} && -f $config{$binary} ) { return $config{$binary}; } ## consult 'which' program if( $WHICH ) { open CMD, "$WHICH $binary 2>/dev/null | grep -v 'no $binary' |" or do { write_log( LEVEL1, "Could not open pipe for '$binary': $!\n" ); return undef; }; my $test = ; close CMD; if( $test && length $test ) { chomp $test; return $test; } } ## find our own from PATH for my $test ( map { "$_/$binary" } split /:/, $ENV{PATH} ) { if( -x $test && -f $test ) { return $test; } } } ## bummer return ''; } sub find_which { for my $path ( map { "$_/which" } grep { defined } split /:/, $ENV{PATH} ) { return $path if -x $path; } return undef; } ## this is a recursive function. It is bounded by $config{'count'} ## (which the user may set to be very high, unfortunately). sub period_log { my $log_arg = shift; my %log = %$log_arg; ## setup source file my $src = mkpath($log{'logpath'}, $log{'log'}) . ( defined $log{'src_ext'} ? $log{'sep'} . $log{'src_ext'} : '' ) . ( defined $log{'cmp_ext'} ? $log{'sep'} . $log{'cmp_ext'} : '' ); ## setup destination file my $dst = mkpath($log{'logpath'}, $log{'log'}) . ( defined $log{'dst_ext'} ? $log{'sep'} . $log{'dst_ext'} : '' ) . ( defined $log{'cmp_ext'} ? $log{'sep'} . $log{'cmp_ext'} : '' ); RENAME: { ## $dst exists as a plain log if( -f $dst ) { if( $log{'dst_ext'} == $log{count} ) { return do_rename( $src, $dst ); } $log{'src_ext'} = $log{'dst_ext'}; $log{'dst_ext'}++; redo RENAME if period_log( \%log ); return undef; } ## $dst exists as a gzip'd log elsif( -f "$dst.gz" ) { if( $log{'dst_ext'} == $log{count} ) { return do_rename( $src, $dst ); } $log{'src_ext'} = $log{'dst_ext'}; $log{'dst_ext'}++; $log{'cmp_ext'} = 'gz'; redo RENAME if period_log( \%log ); return undef; } ## $dst exists as a compress'd log elsif( -f "$dst.Z" ) { if( $log{'dst_ext'} == $log{count} ) { return do_rename( $src, $dst ); } $log{'src_ext'} = $log{'dst_ext'}; $log{'dst_ext'}++; $log{'cmp_ext'} = 'Z'; redo RENAME if period_log( \%log ); return undef; } ## $dst does not exist in any known form else { return do_rename( $src, $dst ); } } } sub do_rename { my $src = shift; my $dst = shift; ## make sure we have a source file unless( -f $src ) { write_log( LEVEL2, "Skipping '$src': file does not exist." ); return 0; } ## rename the log write_log( LEVEL4, "Renaming '$src' to '$dst'..." ); unless( $config{'dry-run'} ) { unless( rename $src, $dst ) { write_log( LEVEL1, "Error renaming '$src' to '$dst': $!\n" ); write_log( LEVEL1, "Skipping '$src'" ); return 0; } } write_log( LEVEL5, "Rename '$src' to '$dst' complete" ); return 1; } sub do_period { my $dir = shift; my $file = shift; my @files = (); ## find files that look like ours opendir DIR, $dir or do { write_log( LEVEL0, "Could not open $dir to do period: $!\n" ); return undef; }; ## we assume (yep) that $SEP is a single character, though there's ## no rule about that. In practice it can be zero or more ## characters in length. We make sure it's a single character ## before we invoke do_period, but in practice it may be anything ## (if they're not doing period logging, for example). ## log 1-4 digits ## SEP @files = grep { /^$file.\d{1,4}/ } readdir DIR; closedir DIR; ## any matches unless( scalar @files ) { return 0; } } ## this is never called until we find a workaround for the GNU tar ## delete bug (I guess we could rewrite the archive...too lazy). sub find_oldest { my $files = shift; my $count = $config{'count'} || DEF_COUNT; my @backwards = (); ## count the number of extensions that are pure digits ## the weakness is, of course, an extension like this: foo.abc123 ## since only the 123 will get captured. ## ## The suggestion is "Don't do that." Use numeric-only extensions ## or extensions that do not end in numbers. We don't know the ## separator (for all we know, the user has mixed separators) so ## we have to go with a weaker regex. my $digits = scalar( grep { defined( (/([\d]+)$/)[0] ) } @$files ); ## good, can do a <=> sort if( $digits == scalar(@$files) ) { @backwards = map { $_->[0] } sort { $a->[1] <=> $b->[1] } map { [ $_, (/([\d]+)$/)[0] ] } @$files; } ## do a cmp sort else { @backwards = map { $_->[0] } sort { $a->[1] cmp $b->[1] } map { [ $_, (/([\da-zA-Z]+)$/)[0] ] } @$files; } my $start = $#backwards - $count; my @return = @backwards[0 .. ( $start > 0 ? $start : 0 )]; return \@return; } ## makes a path from a basename and a filename sub mkpath { # my $base = shift; # my $file = shift; # return $base . '/' . $file; return join('/', @_); } ## emits debug messages sub debug { my $level = shift; unless( defined($level) ) { warn "No debug level given. No logging done.\n"; return 0; } ## check debug level return 1 if $level =~ /^\d+$/ && $level > $config{'debug'}; local $_; my @args = @_; for ( @args ) { chomp; print STDERR "DEBUG [", $level, "] : $_\n"; } return 1; } sub usage { my $msg = shift; if( $msg ) { chomp $msg; print $msg, "\n"; print "\n"; } print <<_USAGE_; usage: $prognam [--options] [log1 .. logn] see savelogs(1) for details. _USAGE_ exit; print <<_USAGE_; usage: $prognam [--options] [log1 .. logn] General program options: --help --version --settings --dry-run --home=/path/to/home --config=/path/to/config Savelogs behavior: --process=[move][filter][archive][compress][delete] Savelogs reporting: --loglevel=n --logfile=[stdout|stderr|/path/to/log] --nogripe How savelogs finds your log files: --log=/path/to/log [--log=...] --apacheconf=/path/to/conf --apachelog=pattern --apachelogexclude[=pattern] --apacheinclude --apachehost=example.tld [--apachehost=...] How savelogs moves (renames) your files: --sep[=character] --ext[=string] --hourly --postmovehook=command --force-pmh --chown=user|uid:group|gid --chmod=nnnn (octal) --postfilterhook=command --force-pfh --stem=extension --stemhook=command --stemlink=link type --filter=command How savelogs archives your files: --tar=path/to/tar --zip=path/to/zip --archive=archive_name.tar --full-path --clobber See savelogs(1) for more information. _USAGE_ exit; } 1; __END__ =head1 NAME savelogs - save/rotate/delete log files nicely =head1 SYNOPSIS B saves your log files in a nice way (by default). savelogs --postmovehook='/usr/local/bin/restart_apache' \ --apacheconf=/www/conf/httpd.conf /var/log/messages savelogs `cat list_of_logs_to_process.txt` savelogs --loglevel=2 /var/log/maillog /var/log/cronlog \ /var/log/messages savelogs --config=/etc/savelogs.conf savelogs --period=15 /var/log/messages savelogs --apacheconf=/www/conf/httpd.conf --apachehost=foo.com =head1 DESCRIPTION B is a flexible and robust log file archival system. Its logic is simple: move (rename) the log file, filter data from the log file, store the log file in an archive (via tar or gtar), and compress the archive (via gzip or compress). After successful compression, the original log file is deleted. All of the above phases are optional. This means that you may simply delete files if you wish. Or you may simply compress existing log files. Or you may move files and add them to a tar file but leave the tar file uncompressed, etc. You pick ;o) (If you just want to cut to the chase and don't care how B works, see the L section near the bottom of this document.) =head2 Savelogs Phases The processing order may be abbreviated into these five phases: move -> filter -> archive -> compress -> delete any of which may be subtracted from the process order. In addition to these phases are some intermediate 'hooks' where you may supply an external program or shell command to invoke. This is useful if you're rotating web server log files and you need to HUP (restart) your web server after moving the logs (for example). Subtracting phases is done in one of two possible ways. The first way is to specify it in the configuration file: Process move,archive,delete which will move log files and archive them (but not filter or compress them). After successful archival, the original log files will be deleted. The second way is to specify it on the command-line: --process=compress,delete which will simply compress log files (but not move, filter, or archive them). In addition to the five phase processing options above, you may also employ the following abbreviations: =over 4 =item I<(no option specified)> If you specify no B option, the default is I. =item I Do none of the phases. This isn't a very useful option. =item I Do all of the phases. =back =head2 An Overview A typical B session might begin by typing this command: savelogs /var/log/messages After which the following phases will execute: =over 4 =item I The log file is renamed: /var/log/messages --> /var/log/messages.010523 =item I The log file is compressed /var/log/messages.010523 --> /var/log/messages.010523.gz =back 4 =head2 A Word About Paths All paths you specify to B should be relative to your home directory (if you're the root user--uid 0--your home directory is set to '/'). You do not need to use a tilde (~). You may assume that B runs from your home directory and knows how to handle absolute paths. If my real home directory were located in F and I wanted to rotate the log file located in F, I would do something like this: savelogs /var/log/messages and B would Do What I Mean. The only exception to this are external commands given to B, B and other such options. Paths you specify here really are full paths. =head1 CONFIGURATION =head2 Configuration file option format B will read its configuration options from a configuration file (which you must supply) or from the command-line. Creating a configuration file is easy: it is a simple Apache-style plaintext file that has options specified in this format: Option value where I