package WWW::Yahoo::Movies;
use strict;
use warnings;
use vars qw($VERSION $AUTOLOAD %FIELDS);
use fields qw(
id
title
cover
year
mpaa_rating
distributor
release_date
runtime
genres
plot_summary
people
matched
error
error_msg
_proxy
_timeout
_user_agent
_page
_parser
_search
_server_url
_search_uri
_movie_uri
);
BEGIN {
$VERSION = '0.03';
}
use LWP::Simple qw(get $ua);
use HTML::TokeParser;
use Carp;
use Data::Dumper;
{
my $_class_def = {
error => 0,
error_msg => '',
mpaa_rating => [],
_timeout => 10,
_user_agent => 'Mozilla/5.0',
_server_url => 'http://movies.yahoo.com',
_movie_uri => '/shop?d=hv&cf=info&id=',
_search_uri => '/mv/search?type=feature&p=',
};
sub _class_def { $_class_def }
sub _get_default_val {
my $self = shift;
my $attr = shift;
return $_class_def->{$attr};
}
}
sub new {
my $class = shift;
my $self = {};
bless $self, $class;
$self->_init(@_);
return $self;
}
sub _init {
my $self = shift;
my %params = @_;
for my $prop(keys %FIELDS) {
my $attr = $prop;
$attr =~ s/^_//;
$self->{$prop} = exists $params{$attr} ? $params{$attr}
: $self->_get_default_val($prop);
}
if($self->proxy) { $ua->proxy(['http'], $self->proxy) }
else { $ua->env_proxy }
$ua->agent($self->user_agent);
$ua->timeout($self->timeout);
$self->_get_page();
return if $self->error;
$self->parse_page();
}
sub _get_page {
my $self = shift;
croak "Wrong paramter!" if $self->id !~ /\d+/ && $self->_search;
my $url = $self->_server_url.($self->id =~ /\d+/ ? $self->_movie_uri : $self->_search_uri).$self->id;
$self->{_page} = get($url) || die "Cannot connect to the Yahoo: $!!";
unless($self->id =~ /\d+/) {
$self->_process_page();
$self->_search(1);
}
}
sub _process_page {
my $self = shift;
if($self->_page =~ /no\s+matches\s+were\s+found/i) {
$self->error_msg("Nothing found!");
$self->error(1);
return;
}
my $parser = $self->_parser;
my($tag, $text);
while($tag = $parser->get_tag('b')) {
$text = $parser->get_text();
last if $text =~ /top\s+matching\s+movie\s+titles/i;
}
$parser->get_tag('table');
while($tag = $parser->get_tag) {
if($tag->[0] eq 'a' && $tag->[1]{href} =~ m#/(\d+)/info#) {
$text = $parser->get_trimmed_text('a', 'br');
my $id = $1;
$self->matched($id, $text);
}
last if $tag->[0] eq '/table';
}
if($self->matched) {
$self->id($self->matched->[0]{id});
$self->_get_page();
} else {
$self->error_msg("Nothing matched!");
$self->error(1);
return;
}
}
sub matched {
my $self = shift;
if(@_) {
my($id, $title) = @_;
push @{ $self->{matched} }, {id => $id, title => $title};
}
return $self->{matched};
}
sub proxy {
my $self = shift;
if(@_) { $self->{_proxy} = shift }
return $self->{_proxy};
}
sub timeout {
my $self = shift;
if(@_) { $self->{_timeout} = shift }
return $self->{_timeout}
}
sub user_agent {
my $self = shift;
if(@_) { $self->{_user_agent} = shift }
return $self->{_user_agent}
}
sub parse_page {
my $self = shift;
$self->_parse_title();
$self->_parse_details();
$self->_parse_cover();
$self->_parse_trailer();
$self->_parse_plot();
}
sub cover_file {
my $self = shift;
if($self->cover) {
my($file_name) = $self->cover =~ /(?:.+)\/(.+)$/;
return $file_name;
}
}
sub mpaa_rating {
my $self = shift;
if($_[0] && ref($_[0]) eq 'ARRAY') { $self->{mpaa_rating} = shift }
return wantarray ? @{ $self->{mpaa_rating} } : $self->{mpaa_rating}[0];
}
sub directors {
my $self = shift;
return $self->{'people'}->{'directors'} if $self->{'people'};
}
sub producers {
my $self = shift;
return $self->{'people'}->{'producers'} if $self->{'people'};
}
sub cast {
my $self = shift;
return $self->{'people'}->{'cast'} if $self->{'people'};
}
sub _parser {
my $self = shift;
$self->{_parser} = new HTML::TokeParser(\$self->_page());
return $self->{_parser};
}
sub _parse_title {
my $self = shift;
($self->{title}, $self->{year}) =
$self->_page =~ m#
(.+)\s+\((\d+)\)
#mi;
}
sub _parse_details {
my $self = shift;
my $p = $self->_parser();
while($p->get_tag('b')) {
my $t;
my $caption = $p->get_text;
SWITCH: for($caption) {
/^Genres/ && do {
$t = $p->get_trimmed_text('/tr');
$self->genres([split m#/#, $t]);
last SWITCH; };
/^Running Time/ && do {
$t = $p->get_trimmed_text('/tr');
$self->runtime($self->_parse_runtime($t));
last SWITCH; };
/^Release Date/ && do {
$t = $p->get_trimmed_text('b');
my($mon, $day, $year) = $t =~ /(.+?)\s+(\d+)th,\s+(\d+)\s?[.(]/;
my $date = "$day $mon $year";
$self->release_date($date);
last SWITCH; };
/^MPAA Rating/ && do {
$t = $p->get_trimmed_text('/tr');
my($code, $descr) = $t =~ /(.+?)\s+(.+)/;
$self->mpaa_rating([$code, $descr]);
last SWITCH; };
/^Distributor/ && do {
$t = $p->get_trimmed_text('/tr');
my($distr) = $t =~ /(.*)\./;
$self->distributor($distr);
last SWITCH; };
/^Cast and Credits$/ && do {
$self->_parse_people($p);
last SWITCH; };
};
}
}
sub _parse_cover {
my $self = shift;
my $p = $self->_parser();
while(my $tag = $p->get_tag('img')) {
if($tag->[1]{alt} && $tag->[1]{alt} =~ /^$self->{title}/i) {
$self->{cover} = $tag->[1]{src};
last;
}
}
}
sub _parse_trailer {
my $self = shift;
my $p = $self->_parser();
while(my $tag = $p->get_tag('a')) {
if($tag->[1]{href} =~ /videoWin/i) {
$self->{trailer} = $tag->[1]{href};
last;
}
}
}
sub _parse_plot {
my $self = shift;
my $p = $self->_parser();
while(my $tag = $p->get_token()) {
if($tag->[0] eq 'C') {
last if $tag->[1] =~ /another vertical spacer/;
}
}
$p->get_tag('font');
$self->{plot_summary} = $p->get_trimmed_text('font', 'table');
}
sub _parse_runtime {
my($self, $time_str) = @_;
my $time = '';
if($time_str) {
my($hours, $min) =
$time_str =~ m#(\d{0,2})(?:\s+hr\w?\.?)(?:\s+?)(\d{1,2})\s+min\.?#;
$time = $hours*60 + $min;
}
return $time;
}
sub _parse_people {
my($self, $p) = @_;
my $key;
while(my $tag = $p->get_token) {
if($tag->[1] eq 'font') {
my $text = $p->get_text();
if($text eq 'Starring:') { $key = 'cast' }
elsif($text eq 'Directed by:') { $key = 'directors' }
elsif($text eq 'Produced by:') { $key = 'producers' }
}
if($tag->[0] eq 'S' && $tag->[1] eq 'a') {
if($tag->[2]{href} =~ /shop\?d\=hc\&id\=(\d+)\&cf\=gen/ && $key) {
push @{ $self->{'people'}->{$key} }, [$1, $p->get_text];
}
}
}
}
sub AUTOLOAD {
my $self = shift;
my($class, $attr) = $AUTOLOAD =~ /(.*)::(.*)/;
my($pack, $file, $line) = caller;
if(exists $FIELDS{$attr}) {
$self->{$attr} = shift() if @_;
return $self->{$attr};
} else {
carp "Method [$attr] not found in the class [$class]!\n Called from $pack at line $line";
}
}
sub DESTROY {
my $self = shift;
}
1;
__END__
=head1 NAME
WWW::Yahoo::Movies - Perl extension to get Yahoo! Movies
information.
=head1 SYNOPSIS
use WWW::Yahoo::Movies;
my $movie = new WWW::Yahoo::Movies();
print "TITLE: ".$movie->title." - ".$movie->year."\n";
=head1 DESCRIPTION
WWW::Yahoo::Movies is Perl interface to the Yahoo! Movies
(http://movies.yahoo.com/). Sometimes IMDB doesn't have full information
about movie (plot summary, cover etc). In that case it's good idea
to have another place to get movie info.
Also, there are many Perl extensions for Yahoo! in the CPAN. Hope
WWW::Yahoo::Movies will be useful as well!
=head2 CONSTRUCTOR
=over 4
=item new()
You should pass movie title or Yahoo! movie ID. In case movie ID it'll
retrieve the movie information directly. If movie title was passed as
constructor parameter it'll make a search, store matched results and
return the first matched movie info:
my $movie = new WWW::Yahoo::Movies(id => 1808444810);
or
my $movie = new WWW::Yahoo::Movies(id => 'Troy');
=back
=head2 PUBLIC OBJECT METHODS
=over 4
=item id()
Yahoo! movie ID:
my $id = $ym->id();
=item title()
Yahoo! movie title:
my $title $ym->title();
=item cover()
A link on Yahoo! movie cover:
use LWP::Simple qw(get);
my $cover_img = get($ym->cover);
print "Content-type: image/jpeg\n\n";
print $cover_img;
=item year()
Year of release of Yahoo! movie:
my $year = $ym->year();
=item mpaa_rating()
MPAA rating of Yahoo! movie. In scalar context it returns MPAA code, in array context
it returns array contained MPAA code and description.
my $mpaa_code = $ym->mpaa_rating();
or
my($mpaa_code, $mpaa_descr) = $ym->mpaa_rating();
For more information about MPAA rating please visit that page
http://www.mpaa.org/movieratings/
=item distributor()
Company name which distributes Yahoo! movie:
my $distr_name = $ym->distributor();
=item release_date()
Release date of Yahoo! movie:
my $date = $ym->release_date();
=item runtime()
Returns a duration of Yahoo! movie in minutes:
my $runtime = $ym->runtime();
=item genres()
Genres of Yahoo! movie:
my @genres = @{ $self->genres };
Note: that method returns a reference on array with genres.
=item plot_summary()
A short description of Yahoo! movie:
my $plot = $ym->plot_summary();
=item matched()
List of mathed Yahoo! movies in case of search by movie's title. It returns
an array reference with hashes in the form of id => title:
map { print "ID: $_->{id}; title: $_->{title}\n" } @{ $ym->matched();
=item people()
Return a hash with following keys - director, producer and cast, which correspond on array
with Yahoo Person ID and person full name:
my $people = $ymovie->people();
for(keys %$people) {
print "Category $_ \n";
for(@{$person->{$_}}) {
print "$_->[0]: $_->[2] ...\n";
}
}
=item cast()
Return a list of movie cast like pair: Yahoo Person ID, person name:
my $cast = $ymovie->cast;
for(@$cast) {
print "$_->[0]: $_->[1]\n";
}
=item producers()
Return a list of movie producers like pair: Yahoo Person ID, person name:
my $producers = $ymovie->producers;
for(@$producer) {
print "$_->[0]: $_->[1]\n";
}
=item directors()
Return a list of movie directors like pair: Yahoo Person ID, person name:
my $directors = $ymovie->directors;
for(@$directors) {
print "$_->[0]: $_->[1]\n";
}
=back
=head2 ERROR METHODS
=over 4
=item error()
Indicates if some error happened during retrieving of movie information:
if($ym->error) {
print "[ERROR] [".$ym->error."] ".ym->error_msg."!\n";
exit(0);
}
=item error_msg()
Contains an error description:
print "ERROR: ".$ym->error_msg."!\n";
=back
=head1 EXAMPLE
#!/usr/bin/perl -w
use strict;
use warnings;
use WWW::Yahoo::Movies;
my $title = shift || 'troy';
my $matched = get_movie_info($title, 1);
for(@$matched) {
print "\nGet [$_->{title}] ...\n";
get_movie_info($_->{id});
}
sub get_movie_info {
my $title = shift;
my $ret_match = shift || 0;
my $ym = new WWW::Yahoo::Movies(id => $title);
print "Get info about [$title] ...";
print "\n\tID: ".$ym->id;
print "\n\tTITLE: ".$ym->title;
print "\n\tYEAR: ".$ym->year;
print "\n\tMPAA: ".$ym->mpaa_rating;
print "\n\tCOVER: ".$ym->cover_file;
print "\n\tPLOT: ".substr($ym->plot_summary, 0, 90)." ...";
print "\n\tDATE: ".$ym->release_date;
print "\n\tDISTR: ".$ym->distributor;
print "\n\tGENRES: ".join(", ", @{ $ym->genres }) if $ym->genres;
return $ym->matched if $ret_match;
}
=head1 EXPORT
None by default.
=head1 SEE ALSO
IMDB::Film
=head1 AUTHOR
Michael Stepanov, Estepanov.michael@gmail.com
=head1 COPYRIGHT AND LICENSE
Copyright (C) 2005 by Michael Stepanov
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.6 or,
at your option, any later version of Perl 5 you may have available.
=cut