The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/env perl

# This example illustrates how to include worker code inline with the main
# script. See comments marked INLINE for necessary tricks to make that work.

### Worker code ###

# INLINE: since this file will be compiled as a module in the worker, the
# worker code must go first

package Getter;

use strict;
use warnings;

use LWP::UserAgent;

use base 'Gearman::WorkerSpawner::BaseWorker';
use fields 'ua';

sub new {
    my Getter $self = fields::new(shift);
    $self->SUPER::new(@_);
    $self->{ua} = LWP::UserAgent->new;
    $self->register_method(get_url => \&get_url);
    return $self;
}

sub get_url {
    my Getter $self = shift;
    my $url = shift;
    $self->{config}{verbose} && warn "Requesting $url\n";
    return $self->{ua}->get($url);
}

package main;

# INLINE: if there's a caller() at top scope than this file was "use"d by
# WorkerSpawner to load the above package source, therefore bail now
return 1 if caller;

### Invoker code ###

use strict;
use warnings;

use Getopt::Long;
use Pod::Usage;

GetOptions(
    'workers=i' => \(my $workers = 10),
    'verbose!'  => \(my $verbose),
    'help!'     => \(my $help),
) || pod2usage(1);
pod2usage(1) if $help;
pod2usage('No URLS provided') unless @ARGV;

# INLINE: Gearman::WorkerSpawner needs to be loaded after the caller() check
# above or worker subprocesses get unhappy when $0 is modified by
# WorkerSpawner; therefore use require instead of use
require Gearman::WorkerSpawner;

# start up some GET workers
my $spawner = Gearman::WorkerSpawner->new;
$spawner->add_worker(
    class         => 'Getter',
    caller_source => 1, # INLINE: this is required
    num_workers   => $workers,
    config        => { verbose => $verbose },
);
$spawner->wait_until_all_ready;

my %urls; # results will come back asynchronously so keep track of which have finished
sub mark_done {
    my $url = shift;
    $urls{$url}--;
    delete $urls{$url} if $urls{$url} <= 0;
    if (!%urls) {
        $verbose && warn "Finished\n";
        exit;
    }
}

for my $url (@ARGV) {
    $urls{$url}++;
    $verbose && warn "Adding job for $url\n";
    $spawner->run_method(get_url => $url, {
        on_complete => sub {
            my $response = shift;
            if ($response->is_success) {
                printf "ok  %3d %s\n", $response->code, substr $url, 0, 100;
            }
            else {
                printf "err %3d\n", $response->code;
            }
            mark_done($url);
        },
        on_fail => sub {
            mark_done($url);
        },
    });
}

$verbose && warn "Starting loop\n";
Danga::Socket->EventLoop();

__END__

=head1 NAME

parallel-get.pl - fetches URLs in parallel

=head1 SYNOPSIS

  perl parallel-get.pl [--workers 100] <url1> <url2> ...

=head1 DESCRIPTION

This script reads URLs from the comand line and dispatches Gearman workers to
do parallel HTTP GET requests on them.

=head1 OPTIONS

=over 4

=item --workers <num>

Number of workers to start with.

=item --[no-]verbose

Show verbose output. Default off.

=item --help

Show this help.

=back

=cut