#!/usr/bin/perl
package CPAN::CachingProxy;
use strict;
use Carp;
use URI;
use Cache::File;
use Data::Dumper;
use LWP::UserAgent;
use Fcntl qw(:flock);
use Digest::SHA1 qw(sha1_hex);
our $VERSION = "1.6500";
# wget -O MIRRORED.BY http://www.cpan.org/MIRRORED.BY
# new {{{
sub new {
my $class = shift;
my $this = bless {@_}, $class;
unless( $this->{cgi} ) {
require CGI or die $@;
$this->{cgi} = new CGI;
}
unless( $this->{cache_object} ) {
$this->{cache_root} = "/tmp/ccp/" unless exists $this->{cache_root};
$this->{cache_root} = "/tmp/ccp/" unless exists $this->{cache_root};
$this->{default_expire} = "2 day" unless exists $this->{default_expire};
$this->{index_expire} = "3 hour" unless exists $this->{index_expire};
$this->{error_expire} = "15 minute" unless exists $this->{error_expire};
$this->{url_lockfile_dir} = "/tmp" unless exists $this->{url_lockfile_dir};
$this->{index_regexp} = qr/(?:03modlist\.data|02packages\.details\.txt|01mailrc\.txt)/ unless exists $this->{index_regexp};
$this->{cache_object} = Cache::File->new(cache_root=>$this->{cache_root}, default_expires => $this->{default_expire} );
}
$this->{key_space} = "CK" unless $this->{key_space};
unless( $this->{ua} ) {
my $ua = $this->{ua} = new LWP::UserAgent;
$ua->agent($this->{agent} ? $this->{agent} : "CCP/$VERSION (Paul's CPAN caching proxy / perlmonks-id=16186)");
if( exists $this->{activity_timeout} ) {
if( defined (my $at = $this->{activity_timeout}) ) {
$ua->timeout($at);
}
} else {
$ua->timeout(12);
}
}
$this->{ua}->timeout( $this->{activity_timeout} ) if defined $this->{activity_timeout};
croak "there are no default mirrors, they must be set" unless $this->{mirrors};
return $this;
}
# }}}
# run {{{
sub run {
my $this = shift;
my $cgi = $this->{cgi};
my $mirror = $this->{mirrors}[ rand @{$this->{mirrors}} ];
my $pinfo = $cgi->path_info || return print $cgi->redirect( $cgi->url . "/" );
$pinfo =~ s/^\///;
$mirror=~ s/\/$//;
my $CK = "$this->{key_space}:$pinfo";
my $URL = "$mirror/$pinfo";
# $URL =~ s/\/{2,}/\//g;
if( $pinfo =~ s{^___/}{} ) {
# NOTE: undocumented special case. If the path begins with ___, it
# probably came from a 404 handler. in which case, the real pinfo was
# probably an absolute url. replace the entire path portion of our
# mirror url with the non ___'d part of the pinfo.
my $nurl = URI->new($mirror);
$nurl->path($pinfo);
# arguably we should use URI for all our path manips, but this section is new and the old stuff works fine
$URL = "$nurl";
}
my $lockfile_fh;
my $uld = $this->{url_lockfile_dir};
if( $uld and -d $uld ) {
for(glob("$uld/.CP_FILE*")) {
open my $tlf, "<", $_ or next;
next unless flock $tlf, (LOCK_NB|LOCK_EX);
warn "[DEBUG] unlinking old URL-lockfile $_\n" if $this->{debug};
unlink $_;
}
# NOTE: sha1 is not for security, as I think timing attacks on this
# have little value really, aside from DoS, and then the local
# attackers probably have better things to do. These are simply here
# because my /tmp is tmpfs, which has surprisingly low file name length
# restrictions.
my $converted = join("/", $uld, ".CP_FILE_" . sha1_hex($URL));
warn "[DEBUG] locking $URL using $converted lockfile\n" if $this->{debug};
open $lockfile_fh, ">", $converted or die "error opening lockfile for $URL: $!";
flock $lockfile_fh, LOCK_EX or die "failed to lock lockfile for $URL: $!";
}
else {
die "as of version 1.6, url_locking_dir is a required option.";
}
my $cache = $this->{cache_object};
if( $cache->exists($CK) and $cache->exists("$CK.hdr") ) { our $VAR1;
my $res = eval $cache->get( "$CK.hdr" ); die "problem finding cache entry\n" if $@;
unless( $this->{ignore_last_modified} ) {
if( my $lm = $res->header('last_modified') ) {
my $_lm = eval { $this->{ua}->head($URL)->header('last_modified') };
# $lm = "hehe, random failure time" if (int rand(7)) == 0;
if( $_lm and $lm ne $_lm ) {
warn "[DEBUG] last_modified differs ($lm vs $_lm), forcing cache miss\n" if $this->{debug};
goto FORCE_CACHE_MISS;
}
}
}
my $start = $this->my_copy_hdr($res, "cache hit");
# XXX: is it the right thing to do to close the lockfile here?
# Probably. At this point, we should have the whole file, and we sure
# don't mind serving similtaneous requests, right?
close $lockfile_fh;
###
my $fh = $cache->handle( $CK, "<" ) or die "problem finding cache entry\n";
my $buf;
BUF: while( read $fh, $buf, 4096 ) {
if( $start > 0 ) {
if( $start > length $buf ) {
$start -= length $buf;
next BUF;
} else {
substr $buf, 0, $start, "";
$start = 0;
}
}
print $buf;
}
close $fh;
} else {
FORCE_CACHE_MISS:
my $expire = $this->{default_expire};
$expire = $this->{index_expire} if $pinfo =~ $this->{index_regexp};
$cache->set($CK, 1, $expire ); # doesn't seem like we should have to do this, but apparently we do
warn "[DEBUG] getting $URL\n" if $this->{debug};
my $fh = $cache->handle( $CK, ">", $expire );
my $request = HTTP::Request->new(GET => $URL);
my $announced_header;
my $response = $this->{ua}->request($request, sub {
my $chunk = shift;
unless( $announced_header ) {
my $res = shift;
$announced_header = 1;
$this->my_copy_hdr($res, "cache miss");
}
print $fh $chunk;
print $chunk;
});
close $fh;
unless( $response->is_success ) {
my $my_fail = "FAIL: " . $response->status_line . "\n";
$cache->set($CK => $my_fail, $expire);
$response->header(content_length=>length $my_fail); # fix content length so we don't lie to clients
$this->my_copy_hdr($response, "cache miss [fail]");
print $my_fail;
}
warn "[DEBUG] setting $CK\n" if $this->{debug};
$cache->set("$CK.hdr", Dumper($response), $expire);
# if there was an error (which we don't know until ex post facto), go back and fix the expiry
if( defined $this->{error_expire} and not $response->is_success ) {
$cache->set_expiry( $CK => $this->{error_expire} );
$cache->set_expiry( "$CK.hdr" => $this->{error_expire} );
}
}
}
# }}}
# {{{ sub my_copy_hdr
sub my_copy_hdr {
my ($this, $res, $hit) = @_;
my $cgi = $this->{cgi};
my $status = $res->status_line;
warn "[DEBUG] cache status: $hit; status: $status\n" if $this->{debug};
my %more_headers = (qw(accept_ranges bytes));
for(qw(content_length), $this->{ignore_last_modified} ? ():(qw(last_modified))) {
my $v = $res->header($_);
if( $v ) {
my $k = lc $_;
$k =~ s/-/_/g;
$more_headers{$k} = $v;
}
}
my $start = 0;
if( my $r = $cgi->http("Range") ) {
if( ($start) = $r =~ m/^bytes=(\d+)-$/ ) {
my $len = $more_headers{content_length};
my $new = $len - $start;
my $end = $len - 1; # this is the byte number, not a number of bytes or something
$more_headers{content_range} = "bytes $start-$end/$len";
$more_headers{content_length} = $new;
}
}
print $cgi->header(-status=>$status, -charset=>"", -type=>$res->header( 'content-type' ), %more_headers);
return $start;
}
# }}}