package WWW::Pastebin::PhpfiCom::Retrieve;
use warnings;
use strict;
our $VERSION = '0.001';
use URI;
use HTML::TokeParser::Simple;
use HTML::Entities;
use base 'WWW::Pastebin::Base::Retrieve';
sub _make_uri_and_id {
my ( $self, $id ) = @_;
$id=~ s{ ^\s+ | (?:http://)? (?:www\.)? phpfi\.com/(?=\d+) | \s+$ }{}xgi;
return ( URI->new("http://phpfi.com/$id"), $id );
}
sub _get_was_successful {
my ( $self, $content ) = @_;
my $results_ref = $self->_parse( $content );
return
unless defined $results_ref;
my $content_uri = $self->uri->clone;
$content_uri->query_form( download => 1 );
my $content_response = $self->ua->get( $content_uri );
if ( $content_response->is_success ) {
$results_ref->{content} =
$self->content($content_response->content);
return $self->results( $results_ref );
}
else {
return $self->_set_error(
'Network error: ' . $content_response->status_line
);
}
}
sub _parse {
my ( $self, $content ) = @_;
my $parser = HTML::TokeParser::Simple->new( \$content );
my %data;
my %nav;
@nav{ qw(get_info level get_lang is_success get_content check_404) }
= (0) x 6;
$nav{content} = '';
while ( my $t = $parser->get_token ) {
if ( $t->is_start_tag('td') ) {
$nav{get_info}++;
$nav{check_404}++;
$nav{level} = 1;
}
elsif ( $nav{check_404} == 1 and $t->is_end_tag('td') ) {
$nav{check_404} = 2;
$nav{level} = 10;
}
elsif ( $nav{check_404} and $t->is_start_tag('b') ) {
return $self->_set_error('This paste does not seem to exist');
}
elsif ( $nav{get_info} == 1 and $t->is_text ) {
my $text = $t->as_is;
$text =~ s/ / /g;
@data{ qw(age name hits) } = $text
=~ /
created \s+
( .+? (?:\s+ago)? ) # stupid timestaps
(?: \s+ by \s+ (.+?) )? # name might be missing
,\s+ (\d+) \s+ hits?
/xi;
$data{name} = 'N/A'
unless defined $data{name};
@nav{ qw(get_info level) } = (2, 2);
}
elsif ( $t->is_start_tag('select')
and defined $t->get_attr('name')
and $t->get_attr('name') eq 'lang'
) {
$nav{get_lang}++;
$nav{level} = 3;
}
elsif ( $t->is_start_tag('div')
and defined $t->get_attr('id')
and $t->get_attr('id') eq 'content'
) {
@nav{ qw(get_content level) } = (1, 4);
}
elsif ( $nav{get_content} and $t->is_end_tag('div') ) {
@nav{ qw(get_content level) } = (0, 5);
}
elsif ( $nav{get_content} and $t->is_text ) {
$nav{content} .= $t->as_is;
$nav{level} = 6;
}
elsif ( $nav{get_lang} == 1
and $t->is_start_tag('option')
and defined $t->get_attr('selected')
and defined $t->get_attr('value')
) {
$data{lang} = $t->get_attr('value');
$nav{is_success} = 1;
last;
}
}
return $self->_set_error('This paste does not seem to exist')
if $nav{content} =~ /entry \d+ not found/i;
return $self->_set_error("Parser error! Level == $nav{level}")
unless $nav{is_success};
$data{ $_ } = decode_entities( delete $data{ $_ } )
for grep { $_ ne 'content' } keys %data;
# content() is set in retrieve()
return \%data;
}
1;
__END__
=head1 NAME
WWW::Pastebin::PhpfiCom::Retrieve - retrieve pastes from http://phpfi.com/ website
=head1 SYNOPSIS
use strict;
use warnings;
use WWW::Pastebin::PhpfiCom::Retrieve;
my $paster = WWW::Pastebin::PhpfiCom::Retrieve->new;
my $results_ref = $paster->retrieve('http://phpfi.com/302683')
or die $paster->error;
printf "Paste %s was posted %s by %s, it is written in %s "
. "and was viewed %s time(s)\n%s\n",
$paster->uri, @$results_ref{ qw(age name lang hits content) };
=head1 DESCRIPTION
The module provides interface to retrieve pastes from L
website via Perl.
=head1 CONSTRUCTOR
=head2 C
my $paster = WWW::Pastebin::PhpfiCom::Retrieve->new;
my $paster = WWW::Pastebin::PhpfiCom::Retrieve->new(
timeout => 10,
);
my $paster = WWW::Pastebin::PhpfiCom::Retrieve->new(
ua => LWP::UserAgent->new(
timeout => 10,
agent => 'PasterUA',
),
);
Constructs and returns a brand new juicy WWW::Pastebin::PhpfiCom::Retrieve
object. Takes two arguments, both are I. Possible arguments are
as follows:
=head3 C
->new( timeout => 10 );
B. Specifies the C argument of L's
constructor, which is used for retrieving. B C<30> seconds.
=head3 C
->new( ua => LWP::UserAgent->new( agent => 'Foos!' ) );
B. If the C argument is not enough for your needs
of mutilating the L object used for retrieving, feel free
to specify the C argument which takes an L object
as a value. B the C argument to the constructor will
not do anything if you specify the C argument as well. B
plain boring default L object with C argument
set to whatever C's C argument
is set to as well as C argument is set to mimic Firefox.
=head1 METHODS
=head2 C
my $results_ref = $paster->retrieve('http://phpfi.com/302683')
or die $paster->error;
my $results_ref = $paster->retrieve('302683')
or die $paster->error;
Instructs the object to retrieve a paste specified in the argument. Takes
one mandatory argument which can be either a full URI to the paste you
want to retrieve or just its ID.
On failure returns either C or an empty list depending on the context
and the reason for the error will be available via C method.
On success returns a hashref with the following keys/values:
$VAR1 = {
'hits' => '0',
'lang' => 'perl',
'content' => '{ test => \'yes\' }',
'name' => 'Zoffix',
'age' => '7 hours and 41 minutes'
};
=head3 content
{ 'content' => '{ test => \'yes\' }' }
The C kew will contain the actual content of the paste.
=head3 lang
{ 'lang' => 'perl' }
The C key will contain the (computer) language of the paste
(as was specified by the poster).
=head3 name
{ 'name' => 'Zoffix' }
The C key will contain the name of the poster who created the paste.
=head3 hits
{ 'hits' => '0' }
The C key will contain the number of times the paste was viewed.
=head3 age
{ 'age' => '7 hours and 41 minutes ago' }
The C key will contain the "age" of the paste, i.e. how long ago
it was created. B if the paste is old enough the C will contain
the date/time of the post instead of "foo bar ago".
=head2 C
$paster->retrieve('http://phpfi.com/302683')
or die $paster->error;
On failure C returns either C or an empty list depending
on the context and the reason for the error will be available via C
method. Takes no arguments, returns an error message explaining the failure.
=head2 C
my $paste_id = $paster->id;
Must be called after a successful call to C. Takes no arguments,
returns a paste ID number of the last retrieved paste irrelevant of whether
an ID or a URI was given to C
=head2 C
my $paste_uri = $paster->uri;
Must be called after a successful call to C. Takes no arguments,
returns a L object with the URI pointing to the last retrieved paste
irrelevant of whether an ID or a URI was given to C
=head2 C
my $last_results_ref = $paster->results;
Must be called after a successful call to C. Takes no arguments,
returns the exact same hashref the last call to C returned.
See C method for more information.
=head2 C
my $paste_content = $paster->content;
print "Paste content is:\n$paster\n";
Must be called after a successful call to C. Takes no arguments,
returns the actual content of the paste. B this method is overloaded
for this module for interpolation. Thus you can simply interpolate the
object in a string to get the contents of the paste.
=head2 C
my $old_LWP_UA_obj = $paster->ua;
$paster->ua( LWP::UserAgent->new( timeout => 10, agent => 'foos' );
Returns a currently used L object used for retrieving
pastes. Takes one optional argument which must be an L
object, and the object you specify will be used in any subsequent calls
to C.
=head1 SEE ALSO
L, L
=head1 AUTHOR
Zoffix Znet, C<< >>
(L, L)
=head1 BUGS
Please report any bugs or feature requests to C, or through
the web interface at L. I will be notified, and then you'll
automatically be notified of progress on your bug as I make changes.
=head1 SUPPORT
You can find documentation for this module with the perldoc command.
perldoc WWW::Pastebin::PhpfiCom::Retrieve
You can also look for information at:
=over 4
=item * RT: CPAN's request tracker
L
=item * AnnoCPAN: Annotated CPAN documentation
L
=item * CPAN Ratings
L
=item * Search CPAN
L
=back
=head1 COPYRIGHT & LICENSE
Copyright 2008 Zoffix Znet, all rights reserved.
This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
=cut