package WWW::Pastebin::PastebinCom::Retrieve;
use warnings;
use strict;
our $VERSION = '0.002';
use URI;
use HTML::TokeParser::Simple;
use HTML::Entities;
use base 'WWW::Pastebin::Base::Retrieve';
sub retrieve {
my $self = shift;
my $id = shift;
$self->$_(undef) for qw(error uri id results);
return $self->_set_error('Missing or empty paste ID or URI')
unless defined $id and length $id;
( my $uri, $id ) = $self->_make_uri_and_id( $id, @_ )
or return;
$self->id( $id );
$self->uri( $uri );
my $ua = $self->ua;
my $response = $ua->get( $uri );
if (
$response->is_success
or $response->code == 404 # and just WHY they thought giving 404s
# on existing pastes is such a great idea?
) {
return $self->_get_was_successful( $response->content );
}
else {
return $self->_set_error('Network error: ' . $response->status_line);
}
}
sub _make_uri_and_id {
my ( $self, $what ) = @_;
my ( $private, $id ) = $what =~ m{
(?:http://)?
(?:www\.)?
(.*?) # "private paste" subdomain
pastebin\.com/
(\w+) # paste ID
}xi;
$id = $what
unless defined $id and length $id;
$private = ''
unless defined $private;
return ( URI->new("http://${private}pastebin.com/$id"), $id );
}
sub _parse {
my ( $self, $content ) = @_;
# yes, they could've given 200s on existing pastes and 404s on
# non-existant, but NO!! 404s for EVERYONE... yey \o/
# that calls for urgent parsing of HTML with regexen WEEEEEEE
$content =~ m|
404 Not Found|
and $content !~ //
and return $self->_set_error('This paste does not seem to exist');
my $parser = HTML::TokeParser::Simple->new( \$content );
my ( %data, %nav );
@nav{ qw(level start get_name_date get_lang get_content) }
= (0) x 5;
while ( my $t = $parser->get_token ) {
if ( $t->is_start_tag('div')
and defined $t->get_attr('id')
and $t->get_attr('id') eq 'content'
) {
@nav{ qw(level start) } = (1, 1);
}
elsif ( $nav{start} == 1 and $t->is_start_tag('h1') ) {
@nav{ qw(level get_name_date) } = (2, 1);
}
elsif ( $nav{get_name_date} == 1 and $t->is_text ) {
@data{ qw(name posted_on) } = $t->as_is
=~ /Posted by (.+) on (.+)/;
@nav{ qw(level get_name_date) } = (3, 0);
}
elsif ( $nav{start} == 1
and $t->is_start_tag('option')
and defined $t->get_attr('selected')
) {
@nav{ qw(level get_lang) } = (4, 1);
}
elsif ( $nav{get_lang} == 1 and $t->is_text ) {
@nav{ qw(level get_lang) } = (5, 0);
$data{lang} = $t->as_is;
}
elsif ( $nav{start} == 1
and $t->is_start_tag('textarea')
and defined $t->get_attr('id')
and $t->get_attr('id') eq 'code'
) {
@nav{ qw(level get_content) } = (6, 1);
}
elsif ( $nav{get_content} == 1 and $t->is_text ) {
$data{content} = $t->as_is;
$nav{is_success} = 1;
last;
}
elsif ( $nav{get_content} == 1 and $t->is_end_tag('textarea') ) {
return $self->_set_error('This paste does not seem to exist');
}
}
unless ( $nav{is_success} ) {
return $self->_set_error (
"Parser error (level $nav{level}).\n"
. "Failed on content:\n$content"
);
}
for ( values %data ) {
unless ( defined and length ) {
$_ = 'N/A';
next;
}
decode_entities $_;
s/\240/ /g;
}
$self->content( $data{content} );
return \%data;
}
=head1 NAME
WWW::Pastebin::PastebinCom::Retrieve - retrieve pastes from http://pastebin.com/ website
=head1 SYNOPSIS
use strict;
use warnings;
use lib '../lib';
use WWW::Pastebin::PastebinCom::Retrieve;
die "Usage: perl retrieve.pl
\n"
unless @ARGV;
my $Paste = shift;
my $paster = WWW::Pastebin::PastebinCom::Retrieve->new;
my $results_ref = $paster->retrieve( $Paste )
or die $paster->error;
printf "Paste content is:\n%s\nPasted by %s on %s\n",
@$results_ref{ qw(content name posted_on) };
=head1 DESCRIPTION
The module provides interface to retrieve pastes from
L website via Perl.
=head1 CONSTRUCTOR
=head2 C
my $paster = WWW::Pastebin::PastebinCom::Retrieve->new;
my $paster = WWW::Pastebin::PastebinCom::Retrieve->new(
timeout => 10,
);
my $paster = WWW::Pastebin::PastebinCom::Retrieve->new(
ua => LWP::UserAgent->new(
timeout => 10,
agent => 'PasterUA',
),
);
Constructs and returns a brand new juicy
WWW::Pastebin::PastebinCom::Retrieve
object. Takes two arguments, both are I. Possible arguments are
as follows:
=head3 C
->new( timeout => 10 );
B. Specifies the C argument of L's
constructor, which is used for retrieving. B C<30> seconds.
=head3 C
->new( ua => LWP::UserAgent->new( agent => 'Foos!' ) );
B. If the C argument is not enough for your needs
of mutilating the L object used for retrieving, feel free
to specify the C argument which takes an L object
as a value. B the C argument to the constructor will
not do anything if you specify the C argument as well. B
plain boring default L object with C argument
set to whatever C's C
argument is
set to as well as C argument is set to mimic Firefox.
=head1 METHODS
=head2 C
my $results_ref = $paster->retrieve('http://pastebin.com/f525c4cec')
or die $paster->error;
my $results_ref = $paster->retrieve('f525c4cec')
or die $paster->error;
Instructs the object to retrieve a paste specified in the argument. Takes
one mandatory argument which can be either a full URI to the paste you
want to retrieve or just its ID.
On failure returns either C or an empty list depending on the context
and the reason for the error will be available via C method.
On success returns a hashref with the following keys/values:
$VAR1 = {
'lang' => 'Perl',
'posted_on' => 'Sat 22 Mar 16:07',
'content' => 'blah blah content of the paste',
'name' => 'Zoffix'
};
=head3 content
{ 'content' => 'blah blah content of the paste', }
The C key will contain the actual content of the paste. See also
C method which is overloaded for this class.
=head3 lang
{ 'lang' => 'Perl' }
The C key will contain the (computer) language of the paste
(as specified by the person who pasted it)
=head3 posted_on
{ 'posted_on' => 'Sat 22 Mar 16:07', }
The C key will contain the date/time when the paste was created.
=head3 name
{ 'name' => 'Zoffix' }
The C key will contain the name of the person who created the paste.
=head2 C
$paster->retrieve('http://pastebin.com/f525c4cec')
or die $paster->error;
On failure C returns either C or an empty list depending
on the context and the reason for the error will be available via C
method. Takes no arguments, returns an error message explaining the failure.
=head2 C
my $paste_id = $paster->id;
Must be called after a successful call to C. Takes no arguments,
returns a paste ID number of the last retrieved paste irrelevant of whether
an ID or a URI was given to C
=head2 C
my $paste_uri = $paster->uri;
Must be called after a successful call to C. Takes no arguments,
returns a L object with the URI pointing to the last retrieved paste
irrelevant of whether an ID or a URI was given to C
=head2 C
my $last_results_ref = $paster->results;
Must be called after a successful call to C. Takes no arguments,
returns the exact same hashref the last call to C returned.
See C method for more information.
=head2 C
my $paste_content = $paster->content;
print "Paste content is:\n$paster\n";
Must be called after a successful call to C. Takes no arguments,
returns the actual content of the paste. B this method is overloaded
for this module for interpolation. Thus you can simply interpolate the
object in a string to get the contents of the paste.
=head2 C
my $old_LWP_UA_obj = $paster->ua;
$paster->ua( LWP::UserAgent->new( timeout => 10, agent => 'foos' );
Returns a currently used L object used for retrieving
pastes. Takes one optional argument which must be an L
object, and the object you specify will be used in any subsequent calls
to C.
=head1 SEE ALSO
L, L
=head1 AUTHOR
Zoffix Znet, C<< >>
(L, L)
=head1 BUGS
Please report any bugs or feature requests to C, or through
the web interface at L. I will be notified, and then you'll
automatically be notified of progress on your bug as I make changes.
=head1 SUPPORT
You can find documentation for this module with the perldoc command.
perldoc WWW::Pastebin::PastebinCom::Retrieve
You can also look for information at:
=over 4
=item * RT: CPAN's request tracker
L
=item * AnnoCPAN: Annotated CPAN documentation
L
=item * CPAN Ratings
L
=item * Search CPAN
L
=back
=head1 COPYRIGHT & LICENSE
Copyright 2008 Zoffix Znet, all rights reserved.
This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
=cut