package WWW::Pastebin::PastebinCa::Retrieve;
use warnings;
use strict;
our $VERSION = '0.003';
use base 'WWW::Pastebin::Base::Retrieve';
use HTML::TokeParser::Simple;
use HTML::Entities;
sub _make_uri_and_id {
my ( $self, $id ) = @_;
my ( $private ) = $id =~ m{(?:http://)? (?:www\.)? (.+?) pastebin\.ca};
$private = ''
unless defined $private;
$id =~ s{ ^ \s+ | (?:http://)? (?:www\.)?.*? pastebin\.ca/ | \s+ $}{}gxi;
return ( URI->new("http://${private}pastebin.ca/$id"), $id );
}
sub _parse {
my ( $self, $content ) = @_;
return $self->_set_error( 'Nothing to parse (empty document retrieved)' )
unless defined $content and length $content;
my $parser = HTML::TokeParser::Simple->new( \$content );
my %data;
my %nav = (
level => 0,
get_lang => 0,
get_name => 0,
get_date => 0,
get_desc => 0,
);
while ( my $t = $parser->get_token ) {
if ( $t->is_start_tag('h2')
#and defined $t->get_attr('class')
#and $t->get_attr('class') eq 'first'
) {
$nav{level} = 1;
}
elsif ( $nav{level} == 1 and $t->is_start_tag('dt') ) {
@nav{ qw(level get_name) } = (2, 1);
}
elsif ( $nav{get_name} == 1 and $t->is_text ) {
$data{name} = $t->as_is;
$nav{get_name} = 0;
}
elsif ( $t->is_start_tag('p') and defined $t->get_attr('id')
and $t->get_attr('id') eq 'des'
) {
$nav{get_desc} = 1;
}
elsif ( $nav{get_desc} and $t->is_text ) {
$data{desc} = $t->as_is;
$nav{get_desc} = 0;
}
elsif ( $nav{level} == 2 and $t->is_start_tag('dd') ) {
$nav{get_date} = 1;
$nav{level}++;
}
elsif ( $nav{get_date} and $t->is_text ) {
$data{post_date} = $t->as_is;
$data{post_date} =~ s/\s+/ /g;
$data{post_date} =~ s/ //g;
$nav{get_date} = 0;
$nav{level} = 7;
}
elsif ( $nav{level} == 7 and $t->is_start_tag('span') ) {
$nav{level}++;
}
elsif ( $t->is_start_tag('textarea')
and defined $t->get_attr('name')
and $t->get_attr('name') eq 'content' ) {
$nav{get_paste} = 1;
}
elsif ( $nav{get_paste} and $t->is_text ) {
$data{content} = $t->as_is;
$nav{get_paste} = 0;
$nav{get_lang} = 1;
}
elsif ( $nav{get_lang} == 1 and $t->is_start_tag('select') ) {
$nav{get_lang} = 2;
}
elsif ( $nav{get_lang} == 2 and $t->is_start_tag('option')
and $t->get_attr('selected')
) {
$nav{get_lang} = 3;
}
elsif ( $nav{get_lang} == 3 and $t->is_text ) {
$data{language} = $t->as_is;
$nav{success} = 1;
last;
}
}
unless ( $nav{success} ) {
my $message = "Failed to parse paste.. ";
$message .= $nav{level}
? "\$nav{level} == $nav{level}"
: "that paste ID doesn't seem to exist";
return $self->_set_error( $message );
}
decode_entities( $_ ) for values %data;
$self->content( $data{content} );
return \%data;
}
1;
__END__
=head1 NAME
WWW::Pastebin::PastebinCa::Retrieve - a module to retrieve pastes from http://pastebin.ca/ website
=head1 CHANGES FROM EARLIER VERSIONS
Due to the change on the pastebin, as of version 0.003, the description
of the paste is available, while the "age" is no longer.
=head1 SYNOPSIS
my $paster = WWW::Pastebin::PastebinCa::Retrieve->new;
$paster->retrieve('http://pastebin.ca/951898')
or die $paster->error;
print "Paste content is:\n$paster\n";
=head1 DESCRIPTION
The module provides interface to retrieve pastes from
L website via Perl.
=head1 CONSTRUCTOR
=head2 C
my $paster = WWW::Pastebin::PastebinCa::Retrieve->new;
my $paster = WWW::Pastebin::PastebinCa::Retrieve->new(
timeout => 10,
);
my $paster = WWW::Pastebin::PastebinCa::Retrieve->new(
ua => LWP::UserAgent->new(
timeout => 10,
agent => 'PasterUA',
),
);
Constructs and returns a brand new juicy WWW::Pastebin::PastebinCa::Retrieve
object. Takes two arguments, both are I. Possible arguments are
as follows:
=head3 C
->new( timeout => 10 );
B. Specifies the C argument of L's
constructor, which is used for retrieving. B C<30> seconds.
=head3 C
->new( ua => LWP::UserAgent->new( agent => 'Foos!' ) );
B. If the C argument is not enough for your needs
of mutilating the L object used for retrieving, feel free
to specify the C argument which takes an L object
as a value. B the C argument to the constructor will
not do anything if you specify the C argument as well. B
plain boring default L object with C argument
set to whatever C's C
argument is set to as well as C argument is set to mimic Firefox.
=head1 METHODS
=head2 C
my $results_ref = $paster->retrieve('http://pastebin.ca/951898')
or die $paster->error;
my $results_ref = $paster->retrieve('951898')
or die $paster->error;
Instructs the object to retrieve a paste specified in the argument. Takes
one mandatory argument which can be either a full URI to the paste you
want to retrieve or just its ID.
On failure returns either C or an empty list depending on the context
and the reason for the error will be available via C method.
On success returns a hashref with the following keys/values:
$VAR1 = {
'language' => 'Raw',
'content' => 'blah blah content of the paste',
'post_date' => 'Friday, March 21st, 2008 at 1:05:19pm MDT',
'name' => 'Unnamed',
'desc' => 'Perl stuff'
};
=over 14
=item language
{ 'language' => 'Raw' }
The (computer) language of the paste.
=item content
{ 'content' => 'select t.terr_id, max(t.start_date) as start_dat' }
The content of the paste.
=item post_date
{ 'post_date' => 'Wednesday, March 5th, 2008 at 10:31:42pm MST' }
The date when the paste was created
=item name
{ 'name' => 'Mine' }
Tha name of the poster or the title of the paste.
=item desc
{ 'desc' => 'Perl stuff' }
Contains description of the paste.
=back
=head2 C
$paster->retrieve('951898')
or die $paster->error;
On failure C returns either C or an empty list depending
on the context and the reason for the error will be available via C
method. Takes no arguments, returns an error message explaining the failure.
=head2 C
my $paste_id = $paster->id;
Must be called after a successful call to C. Takes no arguments,
returns a paste ID number of the last retrieved paste irrelevant of whether
an ID or a URI was given to C
=head2 C
my $paste_uri = $paster->uri;
Must be called after a successful call to C. Takes no arguments,
returns a L object with the URI pointing to the last retrieved paste
irrelevant of whether an ID or a URI was given to C
=head2 C
my $last_results_ref = $paster->results;
Must be called after a successful call to C. Takes no arguments,
returns the exact same hashref the last call to C returned.
See C method for more information.
=head2 C
my $paste_content = $paster->content;
print "Paste content is:\n$paster\n";
Must be called after a successful call to C. Takes no arguments,
returns the actual content of the paste. B this method is overloaded
for this module for interpolation. Thus you can simply interpolate the
object in a string to get the contents of the paste.
=head2 C
my $old_LWP_UA_obj = $paster->ua;
$paster->ua( LWP::UserAgent->new( timeout => 10, agent => 'foos' );
Returns a currently used L object used for retrieving
pastes. Takes one optional argument which must be an L
object, and the object you specify will be used in any subsequent calls
to C.
=head1 SEE ALSO
L, L
=head1 AUTHOR
Zoffix Znet, C<< >>
(L, L, L)
=head1 BUGS
Please report any bugs or feature requests to C, or through
the web interface at L. I will be notified, and then you'll
automatically be notified of progress on your bug as I make changes.
=head1 SUPPORT
You can find documentation for this module with the perldoc command.
perldoc WWW::Pastebin::PastebinCa::Retrieve
You can also look for information at:
=over 4
=item * RT: CPAN's request tracker
L
=item * AnnoCPAN: Annotated CPAN documentation
L
=item * CPAN Ratings
L
=item * Search CPAN
L
=back
=head1 COPYRIGHT & LICENSE
Copyright 2008 Zoffix Znet, all rights reserved.
This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
=cut