package Apache2::S3; use strict; use warnings; use Apache2::Const -compile => qw(OK DECLINED PROXYREQ_REVERSE); use Apache2::RequestRec; use Apache2::Filter; use Apache2::FilterRec; use APR::Table; use APR::String; use MIME::Base64; use Digest::SHA1; use Digest::HMAC; use URI::Escape; use HTML::Entities; use XML::Parser; use Time::Local; use POSIX; use CGI; our $VERSION = '0.05'; our $ESCAPE = quotemeta " #%<>[\]^`{|}?\\"; use constant TEXT => '0'; sub _signature { my ($id, $key, $data) = @_; return "AWS $id:".MIME::Base64::encode_base64(Digest::HMAC::hmac($data, $key, \&Digest::SHA1::sha1), ""); } sub handler { my $r = shift; return Apache2::Const::DECLINED if $r->proxyreq; return Apache2::Const::DECLINED unless $r->method eq 'GET' or $r->dir_config('S3ReadWrite'); my $h = $r->headers_in; my $uri = $r->uri; my %map = split /\s*(?:,|=>)\s*/, $r->dir_config("S3Map"); # most specific (longest) match first foreach my $base (sort { length $b <=> length $a } keys %map) { $uri =~ s|^($base/*)|| or next; my $stripped = $1; my ($bucket, $keyId, $keySecret) = split m|/|, $map{$base}; $keyId ||= $r->dir_config("S3Key"); $keySecret ||= $r->dir_config("S3Secret"); my $is_dir = $uri =~ m,(^|/)$,; my $path = "/$bucket/".($is_dir ? "" : $uri); my $args = $r->args || ""; my $sub = $args =~ s/^(acl|logging|torrent)(?:&|$)// ? $1 : ""; local $CGI::USE_PARAM_SEMICOLONS = 0; $args = CGI->new($r, $args); if ($is_dir) { $args->param('delimiter', $args->param('delimiter') || '/'); $args->param('prefix', $uri) if $uri; } my %note = ( 'id' => $keyId, 'secret' => $keySecret, 'path' => $path, 'sub' => $sub, 'stripped' => $stripped, ($is_dir ? ('prefix' => $uri) : ()), (($args->param('raw') or not $is_dir or $sub) ? ('raw' => 1) : ()), (($args->param('nocache') or $is_dir or $sub) ? ('nocache' => 1) : ()), ); $r->notes->add(__PACKAGE__."::s3_$_" => $note{$_}) foreach keys %note; $r->proxyreq(Apache2::Const::PROXYREQ_REVERSE); $r->uri("http://s3.amazonaws.com$path"); $r->args(($sub ? "$sub&" : "").$args->query_string); $r->filename("proxy:http://s3.amazonaws.com$path"); $r->handler('proxy-server'); # we delay adding the authorization header to give # mod_auth* a chance to authenticate the users request # which would use the same header $r->set_handlers('PerlFixupHandler' => \&s3_auth_handler); # we set up an output filter to translate XML responses # for directory requests into "pretty" HTML $r->add_output_filter(\&output_filter); return Apache2::Const::OK; } return Apache2::Const::DECLINED; } sub s3_auth_handler { my $r = shift; my $h = $r->headers_in; my ($keyId, $keySecret, $path, $sub) = map $r->notes->get(__PACKAGE__."::s3_$_"), qw(id secret path sub); $h->{'Date'} = POSIX::strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime); $h->{'Authorization'} = _signature $keyId, $keySecret, join "\n", $r->method, $h->{'Content-MD5'} || "", $h->{'Content-Type'} || "", $h->{'Date'}, uri_escape($path, $ESCAPE).($sub ? "?$sub" : ""); return Apache2::Const::OK; } sub _xml_get_tags { my ($tree, $tag, @tags) = @_; my @ret; for (my $i = @$tree % 2; $i < @$tree; $i += 2) { next unless $tree->[$i] eq $tag; push @ret, $tree->[$i+1]; last unless wantarray; } return unless @ret; return _xml_get_tags($ret[0], @tags) if @tags; return wantarray ? @ret : $ret[0]; } sub _reformat_directory { my ($f, $ctx) = @_; my $stripped = $f->r->notes->get(__PACKAGE__.'::s3_stripped'); my $prefix = $f->r->notes->get(__PACKAGE__.'::s3_prefix'); my $tree = eval { XML::Parser->new(Style => 'Tree')->parse($ctx->{text}); }; my $list = _xml_get_tags($tree, 'ListBucketResult') or die $ctx->{text}; my $is_truncated = _xml_get_tags($list, 'IsTruncated', TEXT) =~ /^(?:false|)$/i ? 0 : 1; my $next_marker = _xml_get_tags($list, 'NextMarker', TEXT); my @dirs = map +{ Name => _xml_get_tags($_, 'Prefix', TEXT), }, _xml_get_tags($list, 'CommonPrefixes'); my @files = map +{ Name => _xml_get_tags($_, 'Key', TEXT), Size => _xml_get_tags($_, 'Size', TEXT), LastModified => _xml_get_tags($_, 'LastModified', TEXT) =~ /^(\d\d\d\d)-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)(?:\.\d+)?Z$/ ? timegm($6, $5, $4, $3, $2-1, $1) : 0, }, _xml_get_tags($list, 'Contents'); my $ret = ""; $ret .= qq|
|;
$ret .= qq|Parent Directory\n|;
$ret .= qq|Next Page\n|
if $is_truncated and $next_marker;
$ret .= sprintf(qq|%s%s %-18s %s\n|,
$stripped.uri_escape($_->{Name}, $ESCAPE),
HTML::Entities::encode($_->{DisplayName}),
" "x(87 - length $_->{DisplayName}),
$_->{LastModified} ? strftime("%d-%b-%Y %H:%M", localtime($_->{LastModified})) : "-",
$_->{Size} ? APR::String::format_size($_->{Size}) : "")
foreach map {
$_->{DisplayName} = $_->{Name} =~ m|([^/]+)/?$| ? $1 : $_->{Name};
$_;
} @dirs, @files;
$ret .= qq||;
$ret;
}
sub output_filter
{
my $f = shift;
my $ctx;
unless ($ctx = $f->ctx)
{
# disable caching layer if requested
if ($f->r->notes->get(__PACKAGE__.'::s3_nocache'))
{
my $next = $f;
while ($next)
{
$next->remove if $next->frec->name =~ /^cache_\w+$/i;
$next = $next->next;
}
}
else
{
# mark as public to allow mod_cache to save it even though it includes an Authorization header
$f->r->headers_out->{'Cache-Control'} = join(",", grep defined && length,
split(/\s*,\s*/, $f->r->headers_out->{'Cache-Control'} || ""), "public");
}
# don't process this output if requested
if ($f->r->notes->get(__PACKAGE__.'::s3_raw') or lc $f->r->content_type ne 'application/xml')
{
$f->remove;
unless ($f->r->content_type eq 'application/xml')
{
# S3 supports byte-range requests, but doesn't advertise it.
$f->r->headers_out->{'Accept-Ranges'} = 'bytes';
}
return Apache2::Const::DECLINED
}
$f->r->content_type('text/html');
$f->r->headers_out->unset('Content-Length');
$f->ctx($ctx = { text => "" })
}
$ctx->{text} .= $_
while $f->read($_);
return Apache2::Const::OK
unless $f->seen_eos;
my $ret = _reformat_directory($f, $ctx);
$f->r->headers_out->{'Content-Length'} = length $ret;
$f->print($ret);
$f->ctx(undef);
return Apache2::Const::OK;
}
1;
__END__
=head1 NAME
Apache2::S3 - mod_perl library for proxying requests to amazon S3
=head1 SYNOPSIS
PerlModule Apache2::S3;
PerlTransHandler Apache2::S3
PerlSetVar S3Key foo
PerlSetVar S3Secret bar
PerlSetVar S3Map '/path/ => amazon.s3.bucket.name'
# If you want to support non-GET requests
PerlSetVar S3ReadWrite 1
=head1 DESCRIPTION
This module will map requests for URLs on your server into proxy
requests to the Amazon S3 service, adding authentication headers
along the way to permit access to non-public resources.
It doesn't actually do any proxying itself, rather it just adds
the required authentication fields to the request and sets up mod_proxy
to handle it. Therefore you will need to enable mod_proxy like so:
ProxyRequests on
If you permit modification requests (PUT/DELETE) using the
S3ReadWrite feature then it is quite important that you protect
the url from untrusted requests using something like the following
on Apache 2.2: