package Plagger::Plugin::Filter::FetchEnclosure::ParallelUA; use strict; use base qw(Plagger::Plugin::Filter::FetchEnclosure); use LWP::Parallel::UserAgent; use HTTP::Request; use Plagger::Cookies; sub register { my($self, $context) = @_; $context->register_hook( $self, 'update.entry.fixup' => \&enqueue, 'update.fixup' => \&fetch, 'plugin.init' => \&plugin_init, ); } sub plugin_init { my $self = shift; $self->{ua} = LWP::Parallel::UserAgent->new; $self->{ua}->max_hosts( $self->conf->{concurrency} || 10 ); $self->{ua}->max_req( $self->conf->{max_requests_per_host} || 2 ); my $conf = Plagger->context->conf->{user_agent}; if ($conf->{cookies}) { $self->{ua}->cookie_jar( Plagger::Cookies->create($conf->{cookies}) ); } } sub enqueue { my($self, $context, $args) = @_; for my $enclosure ($args->{entry}->enclosures) { # TODO: do all of this in the base class ::Command my $feed_dir = File::Spec->catfile($self->conf->{dir}, $args->{feed}->id_safe); unless (-e $feed_dir && -d _) { $context->log(info => "mkdir $feed_dir"); mkdir $feed_dir, 0777; } my $path = File::Spec->catfile($feed_dir, $enclosure->filename); if ($enclosure->length && -e $path && -s _ == $enclosure->length) { # TODO: if-none-match $context->log(debug => $enclosure->url . "is already stored in $path"); next; } $context->log(info => "fetch " . $enclosure->url . " to " . $path); my $req = HTTP::Request->new(GET => $enclosure->url); if ($self->conf->{fake_referer}) { $context->log(debug => "Sending Referer: " . $args->{entry}->permalink); $req->header('Referer' => $args->{entry}->permalink); } $self->{ua}->register($req, $path); $self->{callback}->{$enclosure->url} = sub { my $response = shift; if ($response->code =~ /^[23]/) { if (my $length = $response->header('Content-Length')) { $enclosure->length($length); $enclosure->local_path($path); } } else { # xxx } }; } } sub fetch { my($self, $context) = @_; $context->log(debug => "wait for responses from Parallel UA ..."); my $entries = $self->{ua}->wait; for my $entry (values %$entries) { my $response = $entry->response; if (my $cb = $self->{callback}->{$response->request->url}) { $cb->($response); } } } 1; __END__ =head1 NAME Plagger::Plugin::Filter::FetchEnclosure::ParallelUA - Fetch enclosures using Parallel UA =head1 SYNOPSIS - module: Filter::FetchEnclosure::ParallelUA config: dir: /path/to/download concurrency: 5 max_requests_per_host: 2 =head1 DESCRIPTION This pluguins uses LWP::Parallel UA to download enclosures from multiple hosts in parallel. =head1 AUTHOR Tatsuhiko Miyagawa =head1 SEE ALSO L, L =cut