#WWW/monitor.pm. Written in 2007 by Yaron Kahanoitch. This # source code has been placed in the public domain by the author. # Please be kind and preserve the documentation. package WWW::Monitor; use 5.005; use warnings; use strict; use Carp; use WWW::Monitor::Task; use WWW::Mechanize; use HTML::FormatText; use File::HomeDir; our(@ISA, @EXPORT, @EXPORT_OK, $VERSION); $VERSION = 0.24; use base qw(Exporter WWW::Mechanize); @EXPORT = qw (); @EXPORT_OK = qw (); our $DEFAULT_CACHE_SUBDIR=".www-monitor"; =head1 NAME WWW::Monitor - Monitor websites for updates and changes =head1 VERSION Version 0.01 =cut =head1 SYNOPSIS use MIME::Lite; use WWW::Monitor; sub notify { my ($url,$text) =@_; foreach my $recipient ('user1@host','user2@host2') { my $mail_obj = MIME::Lite->new(To=>$recipient, From=>'from@myHost', Subject=>"Web alert web page changed", Type=>'Text', Data=>'For details visit '.$url."\n".$text ); $mail_obj->send; } return 1; } my $mon = WWW::Monitor->new('MAIL_CALLBACK'=>\¬ify); $mon->watch('http:://www.kahanovitch.com/'); $mon->run; Or: use WWW::Monitor; my $mon=WWW::Monitor->new('MAIL_CALLBACK'=>\¬ify,'CACHE'=>$cache); my $task = $mon->watch("$url"); $mon->run or die "Query ended with error"; sun notify { my ($url,$task) =@_; print "$url has changed\n"; while (my ($sub_url,$ref_http_response) = each %{$task->added_parts()}) { print "New part added: $sub_url \n"; } while (my ($sub_url,$ref_http_response) = each %{$task->missing_parts()}) { print "Part deleted: $sub_url \n"; } foreach my $sub_url ( $task->changed_parts()) { print "$sub_url has changed:\n"; my ($old,$new) = $task->get_old_new_pair($sub_url); my $old_content = $old->content; my $new_content = $new->content; } } =head1 Description L ia a Web monitoring mechanism built to detect and notify changes in web pages. The module is designed to compare existing, online versions and pre-cached matched version. A web page may include more than one file. A page may include some frames and visible referenced data, which all together form a sigle visible page. For now, WWW::Monitor compares only textual information. Images, and non-HTML data are not being compared. To store information, WWW::Monitor caches data with the "Cache" mechanism. By default, Cache::File is being used, but the user may choose to use any Cache object that implements the Cache module interface. L is a subclass of L, so any of L or its super classes can be used. =head1 EXPORT =head1 FUNCTIONS =head2 new ( [ OPTIONS ] ) A constructor. OPTIONS are passed in a hash like fashion, using key and value pairs. Possible options are: URL - A target URL to monitor. CACHE_ROOT - A root directory under which all caching is being managed. Default = /.www-monitor CACHE - cache object. The object must have get() and set() methods like the Cache interface, as well as set_validity and validity. =cut sub new { my $this = shift; my $class = ref($this) || $this; my %args; unless (@_ % 2) { %args = @_; } else { carp( "Parameters for WWW::Monitor should be given as pair of 'OPTION'=>'VAL'"); } my $cache_root = delete $args{CACHE_ROOT}; unless ($cache_root) { my $def_dir = File::HomeDir->my_home."/".$DEFAULT_CACHE_SUBDIR; if (!-d $def_dir && !mkdir($def_dir)) { carp("directory $def_dir does not exists and cannot be created.$!"); return 0; } $cache_root = $def_dir; } my $cache = delete $args{CACHE}; if ($cache) { unless ((ref($cache) ne "HASH") && $cache->can("get") && $cache->can("set") && $cache->can("set_validity") && $cache->can("get_validity") && $cache->can("exists")) { carp "The given CACHE object must implements Cache interface and must be initialized"; $cache = ""; } } else { require Cache::File; $cache = Cache::File->new( cache_root => $cache_root); } my $mailcallback = delete $args{MAIL_CALLBACK}; my $self=$class->SUPER::new(%args); $self->{tasks} = []; $self->{cache_root} = $cache_root; $cache = ($cache)?$cache:Cache::File->new( cache_root => $self->{cache_root}); $self->{cache} = $cache; $self->{errors_hash} = {}; $self->{mailcallback} = $mailcallback if ($mailcallback); return $self; } =head2 watch ( URL(S) ) Add URL to be watched. watch returns a reference to a L object. for example $obj->watch('http://www.cnn.com' ) =cut sub watch { my $self = shift; my $target = shift; my $task = WWW::Monitor::Task->new('URL',$target); push @{$self->{tasks}},$task; return $task; } =head2 notify_callback ( sub ) A code reference to be executed whenever a change is detected (commonly used for sending mail). The following parameters will be passed to the code reference: $url -> a string that holds the url for which a change was detected. $text -> A Message to be sent. $task -> WWW::Monitor::Task object reference. The given code reference should return true for success. =cut sub notify_callback { my $self = shift; $self->{mailcallback} = shift; return 1; } =head2 run Watch all given web pages and report changes if detected. If a url is first visited (i.e. the url is not in the cache db) than the url will be cached and no report will be created. =cut sub run { my $self = shift; my $carrier = $self; my $cache = $self->{cache}; my $ret = 1; $self->{errors_hash} = {}; foreach my $task (@{$self->{tasks}}) { $task->run($self,$carrier,$cache) or $ret = 0; } return $ret; } =head2 errors_table Return a hash reference of errors updated to last execution (i.e. when the run method was last executed). The returned keys are the urls where the values are error descriptions. =cut sub errors_table { my $self = shift; my $ret_hash = {}; foreach my $task (@{$self->{tasks}}) { $ret_hash->{$task->{url}} = $task->{error} unless $task->success(); } while (my($url,$error) = each %{$self->{errors_hash}}) { $ret_hash->{$url} = $error; } return $ret_hash; } =head2 errors return a string that contains all errors. In array context return a list of errors. =cut sub errors { my $self=shift; my $all_errors_hash = $self->errors_table; my @list_of_errors; while (my($url,$error) = each %$all_errors_hash) { push @list_of_errors,$url.":".$error; } return @list_of_errors if (wantarray); return join("\n",@list_of_errors); } =head2 notify (Private Method) Activate notification callback =cut sub notify { my $self = shift; my ($url,$task) = @_; if (exists $self->{mailcallback} and $self->{mailcallback}) { return &{$self->{mailcallback}}($url,$task); } return 1; } =head2 targets Return a list of strings out of watched targets. =cut sub targets { my $self = shift; my @res = (); foreach my $task (@{$self->{tasks}}) { push @res,$task->{url}; } return @res; } =head1 AUTHOR Yaron Kahanovitch, C<< >> =head1 BUGS Please report any bugs or feature requests to C, or through the web interface at L. I will be notified, and then you'll automatically be notified of progress on your bug as I make changes. =head1 SUPPORT You can find documentation for this module with the perldoc command. perldoc WWW::Monitor =over =item * AnnoCPAN: Annotated CPAN documentation L =item * CPAN Ratings L =item * RT: CPAN's request tracker L =item * Search CPAN L =back =head1 ACKNOWLEDGMENTS =head1 COPYRIGHT & LICENSE Copyright 2007 Yaron Kahanovitch, all rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut 1; # End of WWW::Monitor