############################################################################## # The Faq-O-Matic is Copyright 1997 by Jon Howell, all rights reserved. # # # # This program is free software; you can redistribute it and/or # # modify it under the terms of the GNU General Public License # # as published by the Free Software Foundation; either version 2 # # of the License, or (at your option) any later version. # # # # This program is distributed in the hope that it will be useful, # # but WITHOUT ANY WARRANTY; without even the implied warranty of # # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # # GNU General Public License for more details. # # # # You should have received a copy of the GNU General Public License # # along with this program; if not, write to the Free Software # # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.# # # # Jon Howell can be contacted at: # # 6211 Sudikoff Lab, Dartmouth College # # Hanover, NH 03755-3510 # # jonh@cs.dartmouth.edu # # # # An electronic copy of the GPL is available at: # # http://www.gnu.org/copyleft/gpl.html # # # ############################################################################## use strict; ## ## maintenance.pm ## ## This module should be invoked periodically by cron. ## It can be given an argument to run a specific task, or it will ## automatically determine which tasks to run. ## ## FAQ::OMatic::maintenance::main() (via the dispatch.pm CGI mechanism) will ## make this script do its thing. ## ## FAQ::OMatic::maintenance::invoke(host, port, url) will request the url from the host. ## Cron scripts should invoke me that way, which will cause the above ## invocation (and make maintenance do its thing). package FAQ::OMatic::maintenance; use CGI; use Socket; use FAQ::OMatic; use FAQ::OMatic::Log; use FAQ::OMatic::Auth; use FAQ::OMatic::buildSearchDB; use FAQ::OMatic::Versions; use FAQ::OMatic::ImageRef; use FAQ::OMatic::Slow; use FAQ::OMatic::I18N; my $badKeyMessage = 'Bad maintenance key.'; sub main { my $cgi = FAQ::OMatic::dispatch::cgi(); ## Demand a secret key from the caller, so that we don't have ## Joe Q. Random firing up umpteen copies of the mainenance script ## and slowing things down. With the hints that keep it from doing ## much very often, this probably doesn't matter, but anyway. if ($cgi->param('secret') ne $FAQ::OMatic::Config::maintenanceSecret) { print FAQ::OMatic::header($cgi, '-type'=>"text/plain"); print "$badKeyMessage\n"; return; } my $slow = ''; my $tasks = $cgi->param('tasks') || ''; # if ($tasks ne 'mirrorClient' # and $tasks ne 'rebuildCache') { # # (don't force out the header for Slow processes, which need # # to be able to redirect.) # hprint(FAQ::OMatic::header($cgi, '-type'=>'text/html')); # hprint("FAQ-O-Matic Maintenance\n"); # hflush(); # # just in case some other junk sneaks out on the fd # } else { # $slow = '-slow'; # tell task to run interactively # } # do everything in "slow" mode -- fork a process (so the web # server won't kill it), and redirect all output to a Slow file. my $fh = FAQ::OMatic::Slow::split(); hSetFilehandle($fh); hprint("This is process ID ".$$."

\n"); hflush(); my %schedules = ('month'=>1,'week'=>1,'day'=>1,'hour'=>1); my @tasks = split(',', ($cgi->param('tasks')||'')); if ((@tasks == 0) or ((@tasks == 1) and $schedules{$tasks[0]})) { @tasks = periodicTasks($tasks[0] || ''); } my %taskUntaint = map {$_=>$_} ( 'writeMaintenanceHint', 'trimUHDB', 'trimSubmitTmps', 'buildSearchDB', 'trim', 'cookies', 'errors', 'logSummary', 'rebuildAllSummaries', 'rebuildCache', 'expireBags', 'bagAllImages', 'mirrorClient', 'trimSlowOutput', 'emptyTrash', 'fsck'); #hprint("

\n");
	foreach my $i (sort @tasks) {
		$i =~ s/\d+ //;
		if (defined $taskUntaint{$i}) {
			$i = $taskUntaint{$i};
			hprint("--- $i\n");
			hflush();
			if (not eval "$i(); return 1;") {
				FAQ::OMatic::gripe('problem',
					"*** Task $i failed\n    Error: $@\n");
				hprint("*** Task $i failed\n    Error: $@\n");
				hprint(FAQ::OMatic::stackTrace('html'));
				hflush();
			}
			hflush();
		} else {
			hprint("*** Task $i undefined\n");
			hflush();
		}
	}

	# output results
	hprint("\n
\n"); # provide a link to the install page, just for kicks FAQ::OMatic::getParams($cgi); hprint(FAQ::OMatic::button( FAQ::OMatic::makeAref('install', {}, ''), gettext("Go To Install/Configuration Page"))); hflush(); } sub hSetFilehandle { my $filehandle = shift; FAQ::OMatic::setLocal('maintenance-fh', $filehandle); } sub hprint { # I have no idea why I thought this should be a separate # function. I think because I wanted to have the output appear # as it was generated, but didn't want to have 'print' calls to # weed out if I ever changed how I did printing. my $html = FAQ::OMatic::getLocal('maintenance-html') || ''; $html .= join('', @_); FAQ::OMatic::setLocal('maintenance-html', $html); # we don't flush until explicitly asked to. That way, if a routine # decides to Slow::split, it can get its redirect out before we # send out a header. } sub hflush { my $html = FAQ::OMatic::getLocal('maintenance-html') || ''; my $filehandle = FAQ::OMatic::getLocal('maintenance-fh') || \*STDOUT; print $filehandle $html; FAQ::OMatic::flush($filehandle); FAQ::OMatic::setLocal('maintenance-html', ''); } sub periodicTasks { my $arg = shift || ''; my $lastMaintenance = readMaintenanceHint(); my @thenTime = localtime($lastMaintenance); my @nowTime = localtime($^T); my $newYear = ($thenTime[5] != $nowTime[5])?1:0; my $newMonth = (!defined($thenTime[4]) or !defined($nowTime[4]) or ($thenTime[4] != $nowTime[4]) or $newYear or ($arg eq 'month')) ?1:0; # a tricky case: my $newWeek = (($thenTime[6] > $nowTime[6]) # it's now "earlier" in the week than then or ($lastMaintenance<($^T-(86400*7))) or ($arg eq 'week')) ? 1 : 0; # or it's at least a week later my $newDay = (($thenTime[3] != $nowTime[3]) or $newMonth or ($arg eq 'day'))?1:0; my $newHour = (($thenTime[2] != $nowTime[2]) or $newDay or ($arg eq 'hour'))?1:0; my @tasks = (); # The number in front of the task is the sort order push @tasks, ( '10 buildSearchDB' ) if $newHour; push @tasks, ( '40 mirrorClient', # if we're a mirror, this will do an update. '50 cookies', '60 logSummary', '80 trimUHDB', # turns on a flag so trim() will trim uhdbs '81 trimSubmitTmps',# turns on a flag so trim() will trim submitTmps '82 trimSlowOutput',# turns on a flag so trim() will trim slow-outputs '89 trim', # traverse metadir (needed for trim() to do anything) '90 fsck', '91 emptyTrash' ) if $newDay; push @tasks, ( '55 errors' ) if $newWeek; push @tasks, '98 writeMaintenanceHint'; hprint('Executing schedules:' .($newHour ? ' Hourly' :'') .($newDay ? ' Daily' :'') .($newWeek ? ' Weekly' :'') ."\n\n"); my %tasks = map { ($_,1) } @tasks; return keys %tasks; } # sub runScript { # my $script = shift; # $html.= " Executing $script...\n"; # if (system("$script")) { # $html.= " ...failed because $!\n"; # } # } sub readMaintenanceHint { my $lastMaintenance; if (open LMHINT, "$FAQ::OMatic::Config::metaDir/lastMaintenance") { =~ m/^(\d+)/; $lastMaintenance = defined($1) ? int($1) : 0; # THANKS Michael Gerdts for the defined() test close LMHINT; } else { $lastMaintenance = 0; } return $lastMaintenance; } ############################################################################ ######## Task definitions ################################################ ############################################################################ sub writeMaintenanceHint { if (open LMHINT, ">$FAQ::OMatic::Config::metaDir/lastMaintenance") { print LMHINT $^T." ".scalar localtime($^T)."\n"; close LMHINT; } FAQ::OMatic::Versions::setVersion('MaintenanceInvoked'); } sub trimUHDB { my $trimset = FAQ::OMatic::getLocal('trimset') || {}; $trimset->{'uhdb'} = 1; FAQ::OMatic::setLocal('trimset', $trimset); } sub trimSubmitTmps { my $trimset = FAQ::OMatic::getLocal('trimset') || {}; $trimset->{'submitTmp'} = 1; FAQ::OMatic::setLocal('trimset', $trimset); } sub trimSlowOutput { my $trimset = FAQ::OMatic::getLocal('trimset') || {}; $trimset->{'slow'} = 1; FAQ::OMatic::setLocal('trimset', $trimset); } sub buildSearchDB { my $cgi = FAQ::OMatic::dispatch::cgi(); my $force = $cgi->param('force') || ''; # FORCE feature requested by # "Dameon D. Welch-Abernathy" if ($force or (not -f "$FAQ::OMatic::Config::metaDir/freshSearchDBHint") or (-M "$FAQ::OMatic::Config::metaDir/freshSearchDBHint" > 1/24)) { FAQ::OMatic::buildSearchDB::build(); } else { hprint(" (not needed)\n"); } } sub rebuildAllSummaries { FAQ::OMatic::Log::rebuildAllSummaries(); } sub trim { if (not opendir NEWLOGDIR, $FAQ::OMatic::Config::metaDir) { hprint("*** Couldn't scan $FAQ::OMatic::Config::metaDir."); return; } my $trimset = FAQ::OMatic::getLocal('trimset') || {}; hprint("trimming: ".join(' ', sort keys %{$trimset})."\n"); my $daybefore = FAQ::OMatic::Log::adddays(FAQ::OMatic::Log::numericToday(), -2); while (defined(my $file = readdir(NEWLOGDIR))) { # untaint file -- we should be able to trust the operating system # to provide only reasonable filenames from a readdir(). $file =~ m/^(.*)$/; $file = $1; # uhdb's (unique host databases, part of the access log) if ($trimset->{'uhdb'} and ($file =~ m/^[\d-]+.uhdb./)) { my @dates = ($file =~ m/^([\d-]+)/); my $date = $dates[0]; # Delete all uhdb files before yesterday to save # space. we save the day-before's log, since # we need it if we summarize yesterday's log (which # is another task on the maintenance agenda). # (uhdb = Unique Host Database files. Since no # "new" unique hosts will be joining logs from the # past, having the files around doesn't help # anymore.) # Notice we do these tests based on the name of the # file, not the mod date, since it could have been # generated today by a regenerateSmrys. if ($date lt $daybefore) { hprint("removing $file\n"); unlink "$FAQ::OMatic::Config::metaDir/$file"; } } # submitTmp if ($trimset->{'submitTmp'} and ($file =~ m/^submitTmp\./)) { # only trim files older than a day if (-M "$FAQ::OMatic::Config::metaDir/$file" > 1.0) { hprint("removing $file\n"); unlink "$FAQ::OMatic::Config::metaDir/$file"; } } # slow if ($trimset->{'slow'} and ($file =~ m/^slow-output\./)) { # only trim files older than a day if (-M "$FAQ::OMatic::Config::metaDir/$file" > 1.0) { hprint("removing $file\n"); unlink "$FAQ::OMatic::Config::metaDir/$file"; } } } close NEWLOGDIR; } sub cookies { # throw away old cookies if (not open COOKIES, "$FAQ::OMatic::Config::metaDir/cookies") { hprint("no cookie file.\n"); return; } my $cookiesNewFile = "$FAQ::OMatic::Config::metaDir/cookies.new"; if (not open NEWCOOKIES, ">$cookiesNewFile") { hprint("can't create $FAQ::OMatic::Config::metaDir/cookies.new.\n"); return; } while (defined($_=)) { my ($cookie,$id,$time) = split(' '); if ($time +($FAQ::OMatic::Config::cookieLife||3600) +$FAQ::OMatic::Auth::cookieExtra > $^T) { # TODO: that 3600 default should be encoded somewhere, # rather than being a magic number. It also appears in Auth.pm. print NEWCOOKIES $_; } } close COOKIES; close NEWCOOKIES; if (not chmod(0600, "$cookiesNewFile")) { FAQ::OMatic::gripe('problem', "chmod failed on $cookiesNewFile"); } unlink "$FAQ::OMatic::Config::metaDir/cookies"; if (not rename("$FAQ::OMatic::Config::metaDir/cookies.new","$FAQ::OMatic::Config::metaDir/cookies")) { hprint ("Couldn't rename $FAQ::OMatic::Config::metaDir/cookies.new " ."to $FAQ::OMatic::Config::metaDir/cookies\n" ."because $!\n"); } } sub errors { if (not $FAQ::OMatic::Config::maintSendErrors) { hprint(" Config says don't mail errors.\n"); return; } my $size = (-s "$FAQ::OMatic::Config::metaDir/errors") || 0; if ($size == 0) { hprint(" No ($size) errors to mail.\n"); return; } my $msg = "Errors from ".FAQ::OMatic::fomTitle() ." (v. $FAQ::OMatic::VERSION):\n\n"; if (not open(ERRORS, "$FAQ::OMatic::Config::metaDir/errors")) { hprint(" Couldn't read $FAQ::OMatic::Config::metaDir/errors " ."because $!; not truncating\n"); } else { hprint(" Sending errors...\n"); my @errs = ; close ERRORS; FAQ::OMatic::sendEmail($FAQ::OMatic::Config::adminEmail, "Faq-O-Matic Error Log", $msg.join('', @errs)); # truncate errorfile open ERRORS, ">$FAQ::OMatic::Config::metaDir/errors"; close ERRORS; } } sub logSummary { my $yesterday = FAQ::OMatic::Log::adddays(FAQ::OMatic::Log::numericToday(), -1); FAQ::OMatic::Log::summarizeDay($yesterday); } sub cronInvoke { # special version to be called from cron to alert user to errors my @reply = invoke(@_); my $reply = join('', @reply); # if the cron job fails to run, we should print a message out, # so that the user knows to fix it. (most crons mail the message # to the user.) my ($proto,$code,@rest) = split(' ', $reply[0]); # THANKS to Andrew W. Nosenko # for this patch to ignore 'Moved Temporarily' response. # TODO: I'm not sure 302 guarantees that it actually worked, # but it's something. Why do we return the 'slow' URL to cron? # That seems buggy. if ($code != '200' && $code != '302') { print "Unable to invoke maintenance task; HTTP reply follows:\n\n"; print $reply; } if ($reply =~ m/$badKeyMessage/) { print "$badKeyMessage\n"; print "Admin must access the install page and\n" ."click \"Set up the maintenance cron job.\"\n"; } } sub invoke { my $host = shift; my $port = shift; my $url = shift; my $verbose = shift; my $proto = getprotobyname('tcp'); socket(HTTPSOCK, PF_INET, SOCK_STREAM, $proto); my $httpsock = \*HTTPSOCK; # filehandles are such a nasty legacy in Perl my $sin = sockaddr_in($port, inet_aton($host)); if (not connect(HTTPSOCK, $sin)) { die "maintenance::invoke can't connect(): $!, $@!\n" } print $httpsock "GET $url HTTP/1.0\nHost: ${host}\n\n"; # Thanks to Gabor Melis for the "Host:" # header, which makes virtually hosted sites work right. FAQ::OMatic::flush($httpsock); # Thanks to Miro Jurisic for this fix. my @reply = <$httpsock>; close $httpsock; if ($verbose) { hprint(join('', @reply)); } return @reply if wantarray(); } # We used to have a verifyCache task that rebuilt cached files if # they were older than their item file (or the config file). # The former condition wasn't very useful, since caches are rewritten # whenever items are. The latter shouldn't really have happened # automatically, because it can be verrrry slow on a big FAQ. # And in neither case could you refresh *every* item in the cache. # The new task simply reads and writes every item in the item/ directory, # which ensures that its cache and dependencies are up-to-date. # (note also the 'updateAllDependencies' flag to saveToFile().) # sub rebuildCache { my $slow = shift || ''; return if ((not defined $FAQ::OMatic::Config::cacheDir) or ($FAQ::OMatic::Config::cacheDir eq '')); if ($slow) { my $fh = FAQ::OMatic::Slow::split(); hSetFilehandle($fh); # from now on, our output goes into the slow-output file # to be periodically loaded by the browser. } my $itemName; foreach $itemName (FAQ::OMatic::getAllItemNames()) { hprint( "
Updating $itemName\n"); my $item = new FAQ::OMatic::Item($itemName); if ($item->isEmptyStub()) { # skip stubs next; } if (not eval { $item->saveToFile('', '', 'noChange', 'updateAllDependencies'); 1; }) { FAQ::OMatic::gripe('note', "save failed for ".$itemName); } # flush stdout hflush(); } FAQ::OMatic::Versions::setVersion('CacheRebuild'); } sub expireBags { return if ((not defined $FAQ::OMatic::Config::cacheDir) or ($FAQ::OMatic::Config::cacheDir eq '') or (not defined $FAQ::OMatic::Config::bagsDir) or ($FAQ::OMatic::Config::bagsDir eq '')); my $anyMessages = 0; my @bagList = grep { not m/\.desc$/ } FAQ::OMatic::getAllItemNames($FAQ::OMatic::Config::bagsDir); my $bagName; foreach $bagName (@bagList) { #hprint("
Checking $bagName\n"); my @dependents = FAQ::OMatic::Item::getDependencies("bags.".$bagName); if (scalar(@dependents) == 0) { # don't declare system-supplied bags invalid # THANKS: to John Goerzen for pointing this out my ($prefix) = ($bagName =~ m/^(.*)\.[^\.]+$/); # THANKS: Edwin Chiu for fixing an # uninitialized value warning coming from here. next if (defined($prefix) && FAQ::OMatic::ImageRef::validImage($prefix)); if (not $anyMessages) { hprint("The following suggestion(s) are based on " ."dependency files.\n You might run rebuildCache first " ."if you want to be certain that they are valid.\n\n"); } $anyMessages = 1; my $msg = "Consider removing bag $bagName; it is not linked from " ."any item in the FAQ."; hprint "
$msg\n"; # TODO: in a future version, we could actually just unlink # TODO: the unreferenced bags. (garbage collection). # The comment above about rebuildCache would apply. # Dependencies should stay current, but I'd sure hate # to accidentally blast your bag. } } } sub bagAllImages { return if ((not defined $FAQ::OMatic::Config::bagsDir) or ($FAQ::OMatic::Config::bagsDir eq '')); require FAQ::OMatic::ImageRef; FAQ::OMatic::ImageRef::bagAllImages(); FAQ::OMatic::Versions::setVersion('SystemBags'); } sub mirrorClient { my $slow = shift || ''; my $url = $FAQ::OMatic::Config::mirrorURL; if ((not defined $url) or ($url eq '')) { hprint("mirrorClient() exiting silently because this is " ."not a mirror.\n"); return; } if ($slow) { my $fh = FAQ::OMatic::Slow::split(); hSetFilehandle($fh); # from now on, our output goes into the slow-output file # to be periodically loaded by the browser. } hprint("

Querying master site for item and bag modification times.

\n"); hflush(); my $limit = -1; # Set to a small number for debugging, so you don't # have to wait for the whole mirror to complete. # Set to -1 for normal operation. require FAQ::OMatic::install; # cheesily parse the URL. This seemed better than use'ing the # whole URL.pm kit. I had bad luck with it once. Maybe I'm irrational. my ($host, $port, $path) = $url =~ m#http://([^/:]+)(:\d+)?/(.*)$#; if (defined $port) { $port =~ s/^://; } else { $port = 80; } $path = "/$path?cmd=mirrorServer"; my @reply = invoke($host, $port, $path); # chew HTTP headers until a blank line while (not $reply[0] =~ m/^[\r\n]*$/) { shift @reply; } shift @reply; # chew off that blank line @reply = grep { not m/^#/ } @reply; # chew off comment lines @reply = map { chomp $_; $_ } @reply; # chomp LFs # first line of remaining content must be version number my $version = shift @reply; if ($version ne 'version 1.0') { die "This FAQ-O-Matic version $FAQ::OMatic::VERSION only understands " ."mirrorServer data version 1.0; received $version."; } #hprint join("\n", @reply); my @itemURL = grep { m/^itemURL\s/ } @reply; my $itemURL = ($itemURL[0] =~ m/^itemURL\s+(.*)$/)[0]; if (not defined $itemURL) { die "master didn't send itemURL line."; } my @configs = grep { m/^config\s/ } @reply; my $config; my $map = FAQ::OMatic::install::readConfig(); hprint("configs supplied: ".scalar(@configs)."\n"); foreach $config (@configs) { my ($left,$right) = ($config =~ m/config (\$\S+) = (.+)$/); if (defined $left and defined $right) { # unless the config line is buggy, as can happen when # the mirrorServer FAQ is running 2.618 :v(, update our config $map->{$left} = $right; hprint("
$left => $right\n"); } } FAQ::OMatic::install::writeConfig($map); # now make sure that config takes effect for all the cache # files we're about to write FAQ::OMatic::install::rereadConfig(); hflush(); my $count = 0; my @items = grep { m/^item\s/ } @reply; my $line; foreach $line (@items) { my ($file,$lms) = ($line =~ m/item\s+(\S+)\s+(\S+)/); if (not defined $file || $file eq '') { hprint("
Can't parse: $line\n"); next; } my $item = new FAQ::OMatic::Item($file); my $existingLms = $item->{'LastModifiedSecs'} || -1; if ($lms != $existingLms) { hprint("
$file ".$item->getTitle().": item needs update\n"); mirrorItem($host, $port, $itemURL."$file", $file, ''); $count++; # each net access counts 1, whether or not it takes } else { # hprint "
Already have: $file ".$item->getTitle()."\n"; # Benign output supressed so you can see which items you # don't have. } if ($limit>=0 && $count >= $limit) { hprint("

stopping because count = limit ($limit)\n"); return; } # flush output hflush(); } my @bagsURL = grep { m/^bagsURL\s/ } @reply; my $bagsURL = ($bagsURL[0] =~ m/^bagsURL\s+(.*)$/)[0]; if (not defined $bagsURL) { die "master didn't send bagsURL line."; } my @bags = grep { m/^bag\s/ } @reply; foreach $line (@bags) { my ($bagword,$file,$lms) = split(/\s+/, $line); if (not defined($bagword) || ($bagword ne 'bag') || not defined($file)) { hprint("bad line: $line"); #continue; LEFT OFF } if ((not defined($lms)) || ($lms eq '')) { # some old mirrorServer forgot to send a date --- assume the # file is recently modified. $lms = time(); } $file = FAQ::OMatic::Bags::untaintBagName($file); if ($file eq '') { hprint("
Tainted bag name in '$line'\n"); next; } my $descFile = $file.".desc"; my $descItem = new FAQ::OMatic::Item($descFile, $FAQ::OMatic::Config::bagsDir); my $existingLms = $descItem->{'LastModifiedSecs'} || -1; if ($lms != $existingLms) { hprint("
${file}: bag needs update\n"); # transfer bag byte-for-byte to my bags dir mirrorBag($host, $port, $bagsURL."$file", $file); # transfer the .desc file, using same item mirroring code as above mirrorItem($host, $port, $bagsURL.$descFile, $descFile, $FAQ::OMatic::Config::bagsDir); # update the link in any items that point to this bag FAQ::OMatic::Bags::updateDependents($file); $count += 2; } else { # hprint "
Already have: $file\n"; } if ($limit>=0 && $count >= $limit) { hprint("

stopping because count = limit ($limit)\n"); return; } # flush output hflush(); } } # a close relative of previous function invoke(). sub mirrorItem { my $host = shift; my $port = shift; my $path = shift; my $itemFilename = shift; my $itemDir = shift; my $proto = getprotobyname('tcp'); my $sin = sockaddr_in($port, inet_aton($host)); socket(HTTPSOCK, PF_INET, SOCK_STREAM, $proto); my $httpsock = \*HTTPSOCK; if (not connect($httpsock, $sin)) { die "mirrorItem can't connect(): $!, $@!\n" } my $request = "GET $path HTTP/1.0\nHost: ${host}"; # THANKS to Stefan Stidl for the # Host: header, to fix mirroring from virtual hosts print $httpsock "$request\n\n"; FAQ::OMatic::flush($httpsock); # Thanks to Miro Jurisic for this fix. my $httpstatus = <$httpsock>; # get initial result chomp $httpstatus; my ($statusNum) = ($httpstatus =~ m/\s(\d+)\s/); if ($statusNum != 200) { hprint("
Request '$request' for $itemFilename from " ."$host:$port failed: ($statusNum) $httpstatus\n"); close($httpsock); return; } while (defined($_=<$httpsock>)) { # blow past HTTP headers last if ($_ =~ m/^[\r\n]*$/); } my $item = new FAQ::OMatic::Item(); $item->{'filename'} = $itemFilename; $item->loadFromFileHandle($httpsock); close($httpsock); $item->{'titleChanged'} = 'true'; # since we just mirrored this guy, the title may very well # have changed, so we need to be sure to rewrite dependent items. $item->saveToFile($itemFilename, $itemDir, 'noChange', 'updateAllDeps', 'noRecomputeDependencies'); # notice we're passing in a filename we got from that # web server -- an insidious master might try to pass # off bogus item filenames with '..'s in them. But saveToFile() # has a taint-check to prevent that sort of thing. # 'noChange' keeps lastModified date intact, so we won't keep # re-mirroring this item. # 'updateAllDependencies' is necessary, because otherwise # saveToFile only adds those dependencies that are "new" to # this item -- but we only have the item, not the .dep file, # so we need to always regenerate all deps. # 'noRecomputeDependencies' prevents Item.pm from trying to # resolve forward references to nonexistent items. For example, an # item can have a parent that hasn't been reached yet in the # mirroring. hprint("
$itemFilename (".$item->getTitle()."): " ."item mirrored successfully\n"); } sub mirrorBag { my $host = shift; my $port = shift; my $path = shift; my $bagFilename = shift; # already untainted by caller my $proto = getprotobyname('tcp'); my $sin = sockaddr_in($port, inet_aton($host)); socket(HTTPSOCK, PF_INET, SOCK_STREAM, $proto); my $httpsock = \*HTTPSOCK; # filehandles are such a nasty legacy in Perl if (not connect($httpsock, $sin)) { die "mirrorBag can't connect(): $!, $@!\n" } my $request = "GET $path HTTP/1.0\nHost: ${host}"; print $httpsock "$request\n\n"; FAQ::OMatic::flush($httpsock); # Thanks to Miro Jurisic for this fix. my $httpstatus = <$httpsock>; # get initial result chomp $httpstatus; my ($statusNum) = ($httpstatus =~ m/\s(\d+)\s/); if ($statusNum != 200) { hprint("
Request '$request' for $bagFilename from " ."$host:$port failed: ($statusNum) $httpstatus\n"); close($httpsock); return; } while (defined($_=<$httpsock>)) { # blow past HTTP headers last if ($_ =~ m/^[\r\n]*$/); } # input looks good at this point -- open output bag file if (not open(BAGFILE, ">".$FAQ::OMatic::Config::bagsDir.$bagFilename)) { hprint("
open of $bagFilename failed: $!\n"); close $httpsock; return; } my $sizeBytes = 0; my $buf; while (read($httpsock, $buf, 4096)) { print BAGFILE $buf; $sizeBytes += length($buf); # TODO: maybe have (mirror-site-admin)-configurable length limit here } close(BAGFILE); close($httpsock); if (not chmod(0644, $FAQ::OMatic::Config::bagsDir.$bagFilename)) { FAQ::OMatic::gripe('problem', "chmod(" .$FAQ::OMatic::Config::bagsDir.$bagFilename ." failed: $!"); } if ($sizeBytes == 0) { hprint("
Uh oh, I read no bytes for $bagFilename.\n"); return; } hprint("
${bagFilename}: bag mirrored successfully\n"); } sub mtime { my $filename = shift; return (stat($filename))[9] || 0; } sub emptyTrash { my $slow = shift || ''; if ($slow) { my $fh = FAQ::OMatic::Slow::split(); hSetFilehandle($fh); # from now on, our output goes into the slow-output file # to be periodically loaded by the browser. } my $trashExpirationDays = $FAQ::OMatic::Config::trashTime || 0; if ($trashExpirationDays == 0) { hprint("\$trashTime says to never take out the trash.
\n"); return; } my $trashItem = new FAQ::OMatic::Item('trash'); if ($trashItem->isBroken()) { FAQ::OMatic::gripe('problem', gettext('Crud, the trash can is broken.')); return; } # walk the trash tree looking for old trash hprint("
At top level:\n"); my @children = $trashItem->getChildren(); emptyTrashVisitChildren($trashItem, \@children, 1); $trashItem->saveToFile(); hprint("

emptyTrash: Done\n"); } sub indent { my $amount = shift; return ("."x($amount*3))." "; } sub emptyTrashVisitChildren { my $parentItem = shift; my $childList = shift; my $indent = shift; hprint(indent($indent)."Visit children (".scalar(@$childList).")\n"); foreach my $childName (@$childList) { #hprint(indent($indent+1)."Visit children ($childName)\n"); my $childItem = new FAQ::OMatic::Item($childName); if ($childItem->isBroken()) { # can't see into child item. Detach, and let fsck find # and destroy te item file. hprint(indent($indent)."detaching broken child $childName\n"); $parentItem->removeSubItem($childName, 'deferUpdate'); # deferred update okay because caller will # explicitly save this guy } else { emptyTrashVisitItem($childName, $childItem, $indent); } hflush(); } } sub emptyTrashVisitItem { my $itemName = shift; my $item = shift; my $indent = shift; my $trashExpirationDays = $FAQ::OMatic::Config::trashTime || 0; my $trashExpirationSeconds = $trashExpirationDays*24*60*60; my $filename = $item->{'filename'} || '[no filename]'; my $title = $item->getTitle() || '[no title]'; hprint(indent($indent)."Examining trash node $filename ($itemName) named $title\n"); # Get (rid of) the children; # if this isn't a category, it'll just get the # empty list, and that's perfect. my @children = $item->getChildren(); if (scalar(@children)>0) { emptyTrashVisitChildren($item, \@children, $indent+1); } # if there aren't any children now (either there weren't before, or # we actually blasted them all), then consider blasting this guy, too. my $blasted = 0; @children = $item->getChildren(); if (scalar(@children)<=0) { # found a leaf hprint(indent($indent)."Found a leaf ".$item->getTitle()."\n"); my $lastModified = $item->{'LastModifiedSecs'} || 0; my $age = time() - $lastModified; if ($age > $trashExpirationSeconds) { hprint(indent($indent)." ... and it's old. Should delete.\n"); my $filename = $item->{'filename'}; my $rc = $item->destroyItem('deferUpdate'); # deferred update is okay, because we're going to # save this guy's parent anyway when we return from # the recursion (if the parent isn't blasted.) if ($rc) { hprint(indent($indent)."removed $filename; rc = $rc"); $blasted = 1; } else { hprint(indent($indent)."couldn't remove $filename; rc = $rc"); } } else { hprint(indent($indent).sprintf("'-but it's not old. (%d sec)\n", $age)); } } if (!$blasted) { # make sure changes to directory stick. hprint(indent($indent).scalar(@children)." children remain. This item survives. Saving changed directory\n"); $item->saveToFile(); } } sub fsck { my $reportFreq = 100; # check for: # links to broken items -> replace link with text describing broken link # items claimed by multiple parents -> rewrite as links in n-1 parents # roots other than 1, trash -> connect to lost+found hprint("Pass 1: detect broken files\n"); hflush(); my $count = 0; foreach my $filei (FAQ::OMatic::getAllItemNames()) { my $item = new FAQ::OMatic::Item($filei); # if ($filei eq '1117') { # hprint("1117: broken=".($item->isBroken()?'t':'f') # ." emptyStub=".($item->isEmptyStub()?'t':'f') # ."\n"); # } if ($item->isBroken() and not $item->isEmptyStub()) { # file is broken. disconnect it from parent; delete file fsckReport("found broken item file $filei; destroying"); FAQ::OMatic::Item::destroyItemRaw($filei); } if (((++$count)%$reportFreq)==0) { hprint("checked $count items; finished $filei\n"); } hflush(); } my $lostAndFoundItem = undef; hprint("Pass 2: Detect incorrect ownership claims, disconnected subtrees\n"); hflush(); $count = 0; foreach my $filei (FAQ::OMatic::getAllItemNames()) { my $item = new FAQ::OMatic::Item($filei); if ($item->isBroken() and not $item->isEmptyStub()) { FAQ::OMatic::gripe('abort', "pass 1 failed; can't proceed to pass 2."); } my @children = $item->getChildren(); my $changedChildren = 0; foreach my $childName (@children) { my $childItem = new FAQ::OMatic::Item($childName); if ($childItem->isBroken()) { fsckReport("$filei owns broken child $childName; detaching"); $item->removeSubItem($childName, 'deferUpdate'); $changedChildren = 1; } elsif ($childItem->{'Parent'} ne $filei) { fsckReport("$filei claims to own $childName, but ${childName}'s parent is ".$childItem->{'Parent'}."; detaching"); $item->removeSubItem($childName, 'deferUpdate'); $changedChildren = 1; } } if ($changedChildren) { $item->saveToFile(); } hflush(); my $parent = $item->getParent(); if ($filei eq '1' || $filei eq 'trash' || $filei eq 'help000') { # ignore these; they're okay as roots. } elsif ($parent->isBroken() or ($parent == $item)) { fsckReport("$filei is a root, but shouldn't be"); $lostAndFoundItem = getLostAndFoundItem($lostAndFoundItem); $lostAndFoundItem->addSubItem($filei); $item->setProperty('Parent', $lostAndFoundItem->{'filename'}); $item->saveToFile(); $lostAndFoundItem->saveToFile(); } if (((++$count)%$reportFreq)==0) { hprint("checked $count items; finished $filei\n"); } hflush(); } hprint("fsck complete\n"); hflush(); } sub fsckReport { my $msg = shift; hprint("$msg\n"); FAQ::OMatic::gripe('note', $msg); } sub getLostAndFoundItem { my $lostAndFoundItem = shift; if (defined($lostAndFoundItem)) { return $lostAndFoundItem; } my $top = new FAQ::OMatic::Item('1'); foreach my $childName ($top->getChildren) { my $childItem = new FAQ::OMatic::Item($childName); if (($childItem->{'Title'}||'') eq 'lost+found') { return $childItem; } } # have to create one. $lostAndFoundItem = new FAQ::OMatic::Item(); $lostAndFoundItem->setProperty('Parent', '1'); $lostAndFoundItem->setProperty('Title', 'lost+found'); $lostAndFoundItem->saveToFile(FAQ::OMatic::unallocatedItemName('1')); $top->addSubItem($lostAndFoundItem->{'filename'}); $top->saveToFile(); return $lostAndFoundItem; } 1;