############################################################################## # The Faq-O-Matic is Copyright 1997 by Jon Howell, all rights reserved. # # # # This program is free software; you can redistribute it and/or # # modify it under the terms of the GNU General Public License # # as published by the Free Software Foundation; either version 2 # # of the License, or (at your option) any later version. # # # # This program is distributed in the hope that it will be useful, # # but WITHOUT ANY WARRANTY; without even the implied warranty of # # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # # GNU General Public License for more details. # # # # You should have received a copy of the GNU General Public License # # along with this program; if not, write to the Free Software # # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.# # # # Jon Howell can be contacted at: # # 6211 Sudikoff Lab, Dartmouth College # # Hanover, NH 03755-3510 # # jonh@cs.dartmouth.edu # # # # An electronic copy of the GPL is available at: # # http://www.gnu.org/copyleft/gpl.html # # # ############################################################################## use strict; ## ## maintenance.pm ## ## This module should be invoked periodically by cron. ## It can be given an argument to run a specific task, or it will ## automatically determine which tasks to run. ## ## FAQ::OMatic::maintenance::main() (via the dispatch.pm CGI mechanism) will ## make this script do its thing. ## ## FAQ::OMatic::maintenance::invoke(host, port, url) will request the url from the host. ## Cron scripts should invoke me that way, which will cause the above ## invocation (and make maintenance do its thing). package FAQ::OMatic::maintenance; use CGI; use Socket; use FAQ::OMatic; use FAQ::OMatic::Log; use FAQ::OMatic::Auth; use FAQ::OMatic::buildSearchDB; use FAQ::OMatic::Versions; use FAQ::OMatic::ImageRef; use FAQ::OMatic::Slow; use FAQ::OMatic::I18N; my $badKeyMessage = 'Bad maintenance key.'; sub main { my $cgi = FAQ::OMatic::dispatch::cgi(); ## Demand a secret key from the caller, so that we don't have ## Joe Q. Random firing up umpteen copies of the mainenance script ## and slowing things down. With the hints that keep it from doing ## much very often, this probably doesn't matter, but anyway. if ($cgi->param('secret') ne $FAQ::OMatic::Config::maintenanceSecret) { print FAQ::OMatic::header($cgi, '-type'=>"text/plain"); print "$badKeyMessage\n"; return; } my $slow = ''; my $tasks = $cgi->param('tasks') || ''; # if ($tasks ne 'mirrorClient' # and $tasks ne 'rebuildCache') { # # (don't force out the header for Slow processes, which need # # to be able to redirect.) # hprint(FAQ::OMatic::header($cgi, '-type'=>'text/html')); # hprint("
\n"); hflush(); my %schedules = ('month'=>1,'week'=>1,'day'=>1,'hour'=>1); my @tasks = split(',', ($cgi->param('tasks')||'')); if ((@tasks == 0) or ((@tasks == 1) and $schedules{$tasks[0]})) { @tasks = periodicTasks($tasks[0] || ''); } my %taskUntaint = map {$_=>$_} ( 'writeMaintenanceHint', 'trimUHDB', 'trimSubmitTmps', 'buildSearchDB', 'trim', 'cookies', 'errors', 'logSummary', 'rebuildAllSummaries', 'rebuildCache', 'expireBags', 'bagAllImages', 'mirrorClient', 'trimSlowOutput', 'emptyTrash', 'fsck'); #hprint("
\n");
foreach my $i (sort @tasks) {
$i =~ s/\d+ //;
if (defined $taskUntaint{$i}) {
$i = $taskUntaint{$i};
hprint("--- $i\n");
hflush();
if (not eval "$i(); return 1;") {
FAQ::OMatic::gripe('problem',
"*** Task $i failed\n Error: $@\n");
hprint("*** Task $i failed\n Error: $@\n");
hprint(FAQ::OMatic::stackTrace('html'));
hflush();
}
hflush();
} else {
hprint("*** Task $i undefined\n");
hflush();
}
}
# output results
hprint("\n\n");
# provide a link to the install page, just for kicks
FAQ::OMatic::getParams($cgi);
hprint(FAQ::OMatic::button(
FAQ::OMatic::makeAref('install', {}, ''),
gettext("Go To Install/Configuration Page")));
hflush();
}
sub hSetFilehandle {
my $filehandle = shift;
FAQ::OMatic::setLocal('maintenance-fh', $filehandle);
}
sub hprint {
# I have no idea why I thought this should be a separate
# function. I think because I wanted to have the output appear
# as it was generated, but didn't want to have 'print' calls to
# weed out if I ever changed how I did printing.
my $html = FAQ::OMatic::getLocal('maintenance-html') || '';
$html .= join('', @_);
FAQ::OMatic::setLocal('maintenance-html', $html);
# we don't flush until explicitly asked to. That way, if a routine
# decides to Slow::split, it can get its redirect out before we
# send out a header.
}
sub hflush {
my $html = FAQ::OMatic::getLocal('maintenance-html') || '';
my $filehandle = FAQ::OMatic::getLocal('maintenance-fh') || \*STDOUT;
print $filehandle $html;
FAQ::OMatic::flush($filehandle);
FAQ::OMatic::setLocal('maintenance-html', '');
}
sub periodicTasks {
my $arg = shift || '';
my $lastMaintenance = readMaintenanceHint();
my @thenTime = localtime($lastMaintenance);
my @nowTime = localtime($^T);
my $newYear = ($thenTime[5] != $nowTime[5])?1:0;
my $newMonth = (!defined($thenTime[4])
or !defined($nowTime[4])
or ($thenTime[4] != $nowTime[4])
or $newYear
or ($arg eq 'month'))
?1:0;
# a tricky case:
my $newWeek = (($thenTime[6] > $nowTime[6])
# it's now "earlier" in the week than then
or ($lastMaintenance<($^T-(86400*7)))
or ($arg eq 'week')) ? 1 : 0;
# or it's at least a week later
my $newDay = (($thenTime[3] != $nowTime[3]) or $newMonth
or ($arg eq 'day'))?1:0;
my $newHour = (($thenTime[2] != $nowTime[2]) or $newDay
or ($arg eq 'hour'))?1:0;
my @tasks = ();
# The number in front of the task is the sort order
push @tasks, (
'10 buildSearchDB'
) if $newHour;
push @tasks, (
'40 mirrorClient', # if we're a mirror, this will do an update.
'50 cookies',
'60 logSummary',
'80 trimUHDB', # turns on a flag so trim() will trim uhdbs
'81 trimSubmitTmps',# turns on a flag so trim() will trim submitTmps
'82 trimSlowOutput',# turns on a flag so trim() will trim slow-outputs
'89 trim', # traverse metadir (needed for trim() to do anything)
'90 fsck',
'91 emptyTrash'
) if $newDay;
push @tasks, (
'55 errors'
) if $newWeek;
push @tasks, '98 writeMaintenanceHint';
hprint('Executing schedules:'
.($newHour ? ' Hourly' :'')
.($newDay ? ' Daily' :'')
.($newWeek ? ' Weekly' :'')
."\n\n");
my %tasks = map { ($_,1) } @tasks;
return keys %tasks;
}
# sub runScript {
# my $script = shift;
# $html.= " Executing $script...\n";
# if (system("$script")) {
# $html.= " ...failed because $!\n";
# }
# }
sub readMaintenanceHint {
my $lastMaintenance;
if (open LMHINT, "$FAQ::OMatic::Config::metaDir/lastMaintenance") {
Querying master site for item and bag modification times.
\n");
hflush();
my $limit = -1; # Set to a small number for debugging, so you don't
# have to wait for the whole mirror to complete.
# Set to -1 for normal operation.
require FAQ::OMatic::install;
# cheesily parse the URL. This seemed better than use'ing the
# whole URL.pm kit. I had bad luck with it once. Maybe I'm irrational.
my ($host, $port, $path) =
$url =~ m#http://([^/:]+)(:\d+)?/(.*)$#;
if (defined $port) {
$port =~ s/^://;
} else {
$port = 80;
}
$path = "/$path?cmd=mirrorServer";
my @reply = invoke($host, $port, $path);
# chew HTTP headers until a blank line
while (not $reply[0] =~ m/^[\r\n]*$/) {
shift @reply;
}
shift @reply; # chew off that blank line
@reply = grep { not m/^#/ } @reply; # chew off comment lines
@reply = map { chomp $_; $_ } @reply; # chomp LFs
# first line of remaining content must be version number
my $version = shift @reply;
if ($version ne 'version 1.0') {
die "This FAQ-O-Matic version $FAQ::OMatic::VERSION only understands "
."mirrorServer data version 1.0; received $version.";
}
#hprint join("\n", @reply);
my @itemURL = grep { m/^itemURL\s/ } @reply;
my $itemURL = ($itemURL[0] =~ m/^itemURL\s+(.*)$/)[0];
if (not defined $itemURL) {
die "master didn't send itemURL line.";
}
my @configs = grep { m/^config\s/ } @reply;
my $config;
my $map = FAQ::OMatic::install::readConfig();
hprint("configs supplied: ".scalar(@configs)."\n");
foreach $config (@configs) {
my ($left,$right) =
($config =~ m/config (\$\S+) = (.+)$/);
if (defined $left and defined $right) {
# unless the config line is buggy, as can happen when
# the mirrorServer FAQ is running 2.618 :v(, update our config
$map->{$left} = $right;
hprint("
$left => $right\n");
}
}
FAQ::OMatic::install::writeConfig($map);
# now make sure that config takes effect for all the cache
# files we're about to write
FAQ::OMatic::install::rereadConfig();
hflush();
my $count = 0;
my @items = grep { m/^item\s/ } @reply;
my $line;
foreach $line (@items) {
my ($file,$lms) =
($line =~ m/item\s+(\S+)\s+(\S+)/);
if (not defined $file || $file eq '') {
hprint("
Can't parse: $line\n");
next;
}
my $item = new FAQ::OMatic::Item($file);
my $existingLms = $item->{'LastModifiedSecs'} || -1;
if ($lms != $existingLms) {
hprint("
$file ".$item->getTitle().": item needs update\n");
mirrorItem($host, $port, $itemURL."$file", $file, '');
$count++; # each net access counts 1, whether or not it takes
} else {
# hprint "
Already have: $file ".$item->getTitle()."\n";
# Benign output supressed so you can see which items you
# don't have.
}
if ($limit>=0 && $count >= $limit) {
hprint("
stopping because count = limit ($limit)\n");
return;
}
# flush output
hflush();
}
my @bagsURL = grep { m/^bagsURL\s/ } @reply;
my $bagsURL = ($bagsURL[0] =~ m/^bagsURL\s+(.*)$/)[0];
if (not defined $bagsURL) {
die "master didn't send bagsURL line.";
}
my @bags = grep { m/^bag\s/ } @reply;
foreach $line (@bags) {
my ($bagword,$file,$lms) = split(/\s+/, $line);
if (not defined($bagword)
|| ($bagword ne 'bag')
|| not defined($file)) {
hprint("bad line: $line");
#continue; LEFT OFF
}
if ((not defined($lms)) || ($lms eq '')) {
# some old mirrorServer forgot to send a date --- assume the
# file is recently modified.
$lms = time();
}
$file = FAQ::OMatic::Bags::untaintBagName($file);
if ($file eq '') {
hprint("
Tainted bag name in '$line'\n");
next;
}
my $descFile = $file.".desc";
my $descItem = new FAQ::OMatic::Item($descFile,
$FAQ::OMatic::Config::bagsDir);
my $existingLms = $descItem->{'LastModifiedSecs'} || -1;
if ($lms != $existingLms) {
hprint("
${file}: bag needs update\n");
# transfer bag byte-for-byte to my bags dir
mirrorBag($host, $port, $bagsURL."$file", $file);
# transfer the .desc file, using same item mirroring code as above
mirrorItem($host, $port, $bagsURL.$descFile,
$descFile, $FAQ::OMatic::Config::bagsDir);
# update the link in any items that point to this bag
FAQ::OMatic::Bags::updateDependents($file);
$count += 2;
} else {
# hprint "
Already have: $file\n";
}
if ($limit>=0 && $count >= $limit) {
hprint("
stopping because count = limit ($limit)\n");
return;
}
# flush output
hflush();
}
}
# a close relative of previous function invoke().
sub mirrorItem {
my $host = shift;
my $port = shift;
my $path = shift;
my $itemFilename = shift;
my $itemDir = shift;
my $proto = getprotobyname('tcp');
my $sin = sockaddr_in($port, inet_aton($host));
socket(HTTPSOCK, PF_INET, SOCK_STREAM, $proto);
my $httpsock = \*HTTPSOCK;
if (not connect($httpsock, $sin)) {
die "mirrorItem can't connect(): $!, $@!\n"
}
my $request = "GET $path HTTP/1.0\nHost: ${host}";
# THANKS to Stefan Stidl emptyTrash: Done\n");
}
sub indent {
my $amount = shift;
return ("."x($amount*3))." ";
}
sub emptyTrashVisitChildren {
my $parentItem = shift;
my $childList = shift;
my $indent = shift;
hprint(indent($indent)."Visit children (".scalar(@$childList).")\n");
foreach my $childName (@$childList) {
#hprint(indent($indent+1)."Visit children ($childName)\n");
my $childItem = new FAQ::OMatic::Item($childName);
if ($childItem->isBroken()) {
# can't see into child item. Detach, and let fsck find
# and destroy te item file.
hprint(indent($indent)."detaching broken child $childName\n");
$parentItem->removeSubItem($childName, 'deferUpdate');
# deferred update okay because caller will
# explicitly save this guy
} else {
emptyTrashVisitItem($childName, $childItem, $indent);
}
hflush();
}
}
sub emptyTrashVisitItem {
my $itemName = shift;
my $item = shift;
my $indent = shift;
my $trashExpirationDays = $FAQ::OMatic::Config::trashTime || 0;
my $trashExpirationSeconds = $trashExpirationDays*24*60*60;
my $filename = $item->{'filename'} || '[no filename]';
my $title = $item->getTitle() || '[no title]';
hprint(indent($indent)."Examining trash node $filename ($itemName) named $title\n");
# Get (rid of) the children;
# if this isn't a category, it'll just get the
# empty list, and that's perfect.
my @children = $item->getChildren();
if (scalar(@children)>0) {
emptyTrashVisitChildren($item, \@children, $indent+1);
}
# if there aren't any children now (either there weren't before, or
# we actually blasted them all), then consider blasting this guy, too.
my $blasted = 0;
@children = $item->getChildren();
if (scalar(@children)<=0) {
# found a leaf
hprint(indent($indent)."Found a leaf ".$item->getTitle()."\n");
my $lastModified = $item->{'LastModifiedSecs'} || 0;
my $age = time() - $lastModified;
if ($age > $trashExpirationSeconds) {
hprint(indent($indent)." ... and it's old. Should delete.\n");
my $filename = $item->{'filename'};
my $rc = $item->destroyItem('deferUpdate');
# deferred update is okay, because we're going to
# save this guy's parent anyway when we return from
# the recursion (if the parent isn't blasted.)
if ($rc) {
hprint(indent($indent)."removed $filename; rc = $rc");
$blasted = 1;
} else {
hprint(indent($indent)."couldn't remove $filename; rc = $rc");
}
} else {
hprint(indent($indent).sprintf("'-but it's not old. (%d sec)\n", $age));
}
}
if (!$blasted) {
# make sure changes to directory stick.
hprint(indent($indent).scalar(@children)." children remain. This item survives. Saving changed directory\n");
$item->saveToFile();
}
}
sub fsck {
my $reportFreq = 100;
# check for:
# links to broken items -> replace link with text describing broken link
# items claimed by multiple parents -> rewrite as links in n-1 parents
# roots other than 1, trash -> connect to lost+found
hprint("Pass 1: detect broken files\n");
hflush();
my $count = 0;
foreach my $filei (FAQ::OMatic::getAllItemNames()) {
my $item = new FAQ::OMatic::Item($filei);
# if ($filei eq '1117') {
# hprint("1117: broken=".($item->isBroken()?'t':'f')
# ." emptyStub=".($item->isEmptyStub()?'t':'f')
# ."\n");
# }
if ($item->isBroken() and not $item->isEmptyStub()) {
# file is broken. disconnect it from parent; delete file
fsckReport("found broken item file $filei; destroying");
FAQ::OMatic::Item::destroyItemRaw($filei);
}
if (((++$count)%$reportFreq)==0) {
hprint("checked $count items; finished $filei\n");
}
hflush();
}
my $lostAndFoundItem = undef;
hprint("Pass 2: Detect incorrect ownership claims, disconnected subtrees\n");
hflush();
$count = 0;
foreach my $filei (FAQ::OMatic::getAllItemNames()) {
my $item = new FAQ::OMatic::Item($filei);
if ($item->isBroken() and not $item->isEmptyStub()) {
FAQ::OMatic::gripe('abort', "pass 1 failed; can't proceed to pass 2.");
}
my @children = $item->getChildren();
my $changedChildren = 0;
foreach my $childName (@children) {
my $childItem = new FAQ::OMatic::Item($childName);
if ($childItem->isBroken()) {
fsckReport("$filei owns broken child $childName; detaching");
$item->removeSubItem($childName, 'deferUpdate');
$changedChildren = 1;
} elsif ($childItem->{'Parent'} ne $filei) {
fsckReport("$filei claims to own $childName, but ${childName}'s parent is ".$childItem->{'Parent'}."; detaching");
$item->removeSubItem($childName, 'deferUpdate');
$changedChildren = 1;
}
}
if ($changedChildren) {
$item->saveToFile();
}
hflush();
my $parent = $item->getParent();
if ($filei eq '1' || $filei eq 'trash' || $filei eq 'help000') {
# ignore these; they're okay as roots.
} elsif ($parent->isBroken() or ($parent == $item)) {
fsckReport("$filei is a root, but shouldn't be");
$lostAndFoundItem = getLostAndFoundItem($lostAndFoundItem);
$lostAndFoundItem->addSubItem($filei);
$item->setProperty('Parent', $lostAndFoundItem->{'filename'});
$item->saveToFile();
$lostAndFoundItem->saveToFile();
}
if (((++$count)%$reportFreq)==0) {
hprint("checked $count items; finished $filei\n");
}
hflush();
}
hprint("fsck complete\n");
hflush();
}
sub fsckReport {
my $msg = shift;
hprint("$msg\n");
FAQ::OMatic::gripe('note', $msg);
}
sub getLostAndFoundItem {
my $lostAndFoundItem = shift;
if (defined($lostAndFoundItem)) {
return $lostAndFoundItem;
}
my $top = new FAQ::OMatic::Item('1');
foreach my $childName ($top->getChildren) {
my $childItem = new FAQ::OMatic::Item($childName);
if (($childItem->{'Title'}||'') eq 'lost+found') {
return $childItem;
}
}
# have to create one.
$lostAndFoundItem = new FAQ::OMatic::Item();
$lostAndFoundItem->setProperty('Parent', '1');
$lostAndFoundItem->setProperty('Title', 'lost+found');
$lostAndFoundItem->saveToFile(FAQ::OMatic::unallocatedItemName('1'));
$top->addSubItem($lostAndFoundItem->{'filename'});
$top->saveToFile();
return $lostAndFoundItem;
}
1;
Request '$request' for $itemFilename from "
."$host:$port failed: ($statusNum) $httpstatus\n");
close($httpsock);
return;
}
while (defined($_=<$httpsock>)) { # blow past HTTP headers
last if ($_ =~ m/^[\r\n]*$/);
}
my $item = new FAQ::OMatic::Item();
$item->{'filename'} = $itemFilename;
$item->loadFromFileHandle($httpsock);
close($httpsock);
$item->{'titleChanged'} = 'true';
# since we just mirrored this guy, the title may very well
# have changed, so we need to be sure to rewrite dependent items.
$item->saveToFile($itemFilename, $itemDir, 'noChange', 'updateAllDeps',
'noRecomputeDependencies');
# notice we're passing in a filename we got from that
# web server -- an insidious master might try to pass
# off bogus item filenames with '..'s in them. But saveToFile()
# has a taint-check to prevent that sort of thing.
# 'noChange' keeps lastModified date intact, so we won't keep
# re-mirroring this item.
# 'updateAllDependencies' is necessary, because otherwise
# saveToFile only adds those dependencies that are "new" to
# this item -- but we only have the item, not the .dep file,
# so we need to always regenerate all deps.
# 'noRecomputeDependencies' prevents Item.pm from trying to
# resolve forward references to nonexistent items. For example, an
# item can have a parent that hasn't been reached yet in the
# mirroring.
hprint("
$itemFilename (".$item->getTitle()."): "
."item mirrored successfully\n");
}
sub mirrorBag {
my $host = shift;
my $port = shift;
my $path = shift;
my $bagFilename = shift; # already untainted by caller
my $proto = getprotobyname('tcp');
my $sin = sockaddr_in($port, inet_aton($host));
socket(HTTPSOCK, PF_INET, SOCK_STREAM, $proto);
my $httpsock = \*HTTPSOCK; # filehandles are such a nasty legacy in Perl
if (not connect($httpsock, $sin)) {
die "mirrorBag can't connect(): $!, $@!\n"
}
my $request = "GET $path HTTP/1.0\nHost: ${host}";
print $httpsock "$request\n\n";
FAQ::OMatic::flush($httpsock);
# Thanks to Miro Jurisic
Request '$request' for $bagFilename from "
."$host:$port failed: ($statusNum) $httpstatus\n");
close($httpsock);
return;
}
while (defined($_=<$httpsock>)) { # blow past HTTP headers
last if ($_ =~ m/^[\r\n]*$/);
}
# input looks good at this point -- open output bag file
if (not open(BAGFILE, ">".$FAQ::OMatic::Config::bagsDir.$bagFilename)) {
hprint("
open of $bagFilename failed: $!\n");
close $httpsock;
return;
}
my $sizeBytes = 0;
my $buf;
while (read($httpsock, $buf, 4096)) {
print BAGFILE $buf;
$sizeBytes += length($buf);
# TODO: maybe have (mirror-site-admin)-configurable length limit here
}
close(BAGFILE);
close($httpsock);
if (not chmod(0644, $FAQ::OMatic::Config::bagsDir.$bagFilename)) {
FAQ::OMatic::gripe('problem', "chmod("
.$FAQ::OMatic::Config::bagsDir.$bagFilename
." failed: $!");
}
if ($sizeBytes == 0) {
hprint("
Uh oh, I read no bytes for $bagFilename.\n");
return;
}
hprint("
${bagFilename}: bag mirrored successfully\n");
}
sub mtime {
my $filename = shift;
return (stat($filename))[9] || 0;
}
sub emptyTrash {
my $slow = shift || '';
if ($slow) {
my $fh = FAQ::OMatic::Slow::split();
hSetFilehandle($fh);
# from now on, our output goes into the slow-output file
# to be periodically loaded by the browser.
}
my $trashExpirationDays = $FAQ::OMatic::Config::trashTime || 0;
if ($trashExpirationDays == 0) {
hprint("\$trashTime says to never take out the trash.
\n");
return;
}
my $trashItem = new FAQ::OMatic::Item('trash');
if ($trashItem->isBroken()) {
FAQ::OMatic::gripe('problem', gettext('Crud, the trash can is broken.'));
return;
}
# walk the trash tree looking for old trash
hprint("
At top level:\n");
my @children = $trashItem->getChildren();
emptyTrashVisitChildren($trashItem, \@children, 1);
$trashItem->saveToFile();
hprint("