require 5;
# This file contains embedded documentation in POD format.
# Use 'perldoc' to read it.
package XML::RSS::Timing;
use strict;
use Carp ();
use vars qw($VERSION);
use Time::Local ();
$VERSION = '1.07';
BEGIN { *DEBUG = sub () {0} unless defined &DEBUG; } # set DEBUG level
use constant HOUR_SEC => 60 * 60;
use constant DAY_SEC => 60 * 60 * 24;
use constant WEEK_SEC => 60 * 60 * 24 * 7;
use constant MONTH_SEC => 60 * 60 * 24 * 28;
use constant YEAR_SEC => 60 * 60 * 24 * 365;
use constant HOURS_IN_WEEK => 24 * 7;
my @day_names = (
"Sunday", "Monday", "Tuesday", "Wednesday",
"Thursday", "Friday", "Saturday",
);
my %day_name2number;
@day_name2number{@day_names} = (0..6);
# and going the other way, just look at $day_names[ daynumber ]
###########################################################################
=head1 NAME
XML::RSS::Timing - understanding RSS skipHours, skipDays, sy:update*
=head1 SYNOPSIS
...after getting an RSS/RDF feed that contains the following:
3hourly1970-01-01T08:20+00:00
use XML::RSS::Timing;
my $timing = XML::RSS::Timing->new;
$timing->lastPolled( time() );
$timing->updatePeriod( 'hourly' );
$timing->updateFrequency( 3 );
$timing->updateBase( '1970-01-01T08:20+00:00' );
# Find out the soonest I can expect new content:
my $then = $timing->nextUpdate;
print "I can next poll the feed after $then (",
scalar(localtime($then)), " local time)\n";
Polling it before C<$then> is unlikely to return any new content, according
to the C elements' values.
=head1 DESCRIPTION
RSS/RDF modules can use the elements C, C, C,
C, C, and C
to express what days/times they won't update, so
that RSS/RDF clients can conserve network resources by not bothering to
poll a feed more than once during such a period.
This Perl module is for taking in the RSS/RDF C, C,
C, and C elements' values, and figuring out when they
say new content might be available.
Note:
This module doesn't depend on XML::RSS, nor in fact have any
particular relationship with it.
=head1 OVERVIEW
There are two perspectives on this problem:
=over
=item The "When To Ignore Until?" Perspective
With this perspective, you have just polled the given RSS/RDF feed
(regardless of whether its content turns out to be new), and you want to
see if the feed says you can skip polling it until some other future
time. With this perspective, you extract the C fields'
values and/or the C, C, and C values and pass
them to a new XML::RSS::Timing object, and then ask when you should
avoid polling this until. And in the end you'll probably do this:
my $wait_until = $timing->nextUpdate;
$wait_until = time() + $Default_Polling_Delay
# where $Default_Polling_Delay is some reader-defined value
if $wait_until <= time();
...and then file away C<$wait_until>'s value in some internal table
that is consulted before polling things, like so:
foreach my $feed (@FeedObjects) {
next if $feed->wait_until > time();
# Don't poll it, there'll be nothing new
...Else go ahead and poll it, there could be something new...
}
=item The "Is It Time Yet?" Perspective
With this perspective, you polled the RSS feed at some time in the past,
and are now considering whether its C fields' values and/or
the C and C values (which you stored somewhere) say
you can I poll the feed (or whether there'd be no point, if the
C fields say you shouldn't expect any new content). With
this perspective, you use code like this:
...after calling ->skipHours and/or ->updatePeriod, etc
$timing->lastPolled( $when_last_polled );
if( time() < $timing->nextUpdate ) {
# ...Don't poll it, there'll be nothing new...
} else {
... go ahead and poll it, there could be something new...
}
Of the two perspectives, this second one seems less efficient to me,
but your mileage may vary.
=back
=head1 METHODS
This class defines the following methods:
=over
=cut
###########################################################################
=item C<< $timing = XML::RSS::Timing->new(); >>
This constructor method creates a new object to be used on figuring feed
timing. You should use a new object for each feed you're considering.
=cut
sub new { # Vanilla constructor
my $self = $_[0];
$self = bless { }, ref($self) || $self;
$self->init();
return $self;
}
#--------------------------------------------------------------------------
sub init {
my $self = $_[0];
$self->use_exceptions(1);
$self->updateBase('1970-01-01T00:00+00:00');
return;
}
###########################################################################
=item C<< $timing->skipHours( I ) >>
This adds to this C<$timing> object the given list of hours from
the given feed's C element. Hours are expressed as
integers between 0 to 23 inclusive.
=cut
sub skipHours {
return @{ $_[0]{'skipHours'} || [] } if @_ == 1; # as a read list-accessor
my( $self, @hours ) = @_;
foreach my $h (@hours) {
return $self->boom("Usage: \$timingobj->skipHours( hournumbers... )" )
unless defined $h and length $h and $h =~ m/^\d\d?$/s
and $h >= 0 and $h <= 23; # Don't use 24 for midnight. use 0.
}
push @{ $self->{'skipHours'} }, @hours;
return;
}
#--------------------------------------------------------------------------
=item C<< $timing->skipDays( I ) >>
This adds to this C<$timing> object the given list of days from
the given feed's C element. The day name strings have
to be from the set:
"Sunday", "Monday", "Tuesday", "Wednesday",
"Thursday", "Friday", "Saturday".
=cut
sub skipDays {
return @{ $_[0]{'skipDays'} || [] } if @_ == 1; # as a read list-accessor
my( $self, @daynames ) = @_;
foreach my $d (@daynames) {
return $self->boom("Usage: \$timingobj->skipDays( daynames... )" )
unless defined $d and length $d;
return $self->boom("Usage: \$timingobj->skipDays( daynames... ) -- \"$d\" isn't a day name" )
unless exists $day_name2number{$d};
}
push @{ $self->{'skipDays'} }, @daynames;
return;
}
#--------------------------------------------------------------------------
sub skipHours_clear { delete $_[0]{'skipHours'}; return; }
sub skipDays_clear { delete $_[0]{'skipDays' }; return; }
#==========================================================================
=item C<< $timing->updateFrequency( I ) >>
This sets the given C<$timing> object's
updateFrequency value from the feed's (optional) C
element. This has to be a nonzero positive integer.
=cut
sub updateFrequency {
my($self, $freq) = @_;
return $self->{'updateFrequency'} if @_ == 1; # as a read accessor
return $self->boom( "Usage: \$timingobj->updateFrequency( integer )" )
unless @_ == 2 and defined($freq) and $freq =~ m/^\d{1,5}$/s;
# sanity limit: 1-99999
$freq += 0; # numerify the string
$self->{'updateFrequency'} = $freq || 1;
return $self->{'updateFrequency'};
}
#==========================================================================
=item C<< $timing->updateBase( I ) >>
This sets the given C<$timing> object's
updateFrequency value from the feed's (optional) C
element. This has to be a date in one of these formats:
1997
1997-07
1997-07-16
1997-07-16T19:20
1997-07-16T19:20Z
1997-07-16T19:20+01:00
1997-07-16T19:20:30+01:00
1997-07-16T19:20:30.45+01:00
The default value is "1970-01-01T00:00Z".
=cut
sub updateBase {
my($self, $base) = @_;
return $self->{'updateBase'} if @_ == 1; # as a read accessor
return $self->boom("Usage: \$timingobj->updateBase( 'yyyy-mm-ddThh:mm' )")
unless @_ == 2 and defined($base) and length($base);
my $date = $self->_iso_date_to_epoch($base);
return $self->boom("\"$base\" isn't a valid time format.")
unless defined $date;
$self->{'updateBase_sec'} = $date;
$self->{'updateBase'} = $base;
DEBUG and print "Setting updateBase to $base and updateBase_sec to $date\n";
return $base;
}
#==========================================================================
=item C<< $timing->updatePeriod( I ) >>
This sets the given C<$timing> object's
updatePeriod value from the feed's (optional) C
element. This has to be a string from the set:
"hourly", "daily", "weekly", "monthly", "yearly".
=cut
sub updatePeriod {
my($self, $period) = @_;
return $self->{'updatePeriod'} if @_ == 1; # as a read accessor
return $self->boom("Usage: \$timingobj->updatePeriod( interval_string )")
unless @_ == 2 and defined($period) and length($period);
my $sec;
if( $period eq 'hourly' ) { $sec = HOUR_SEC }
elsif( $period eq 'daily' ) { $sec = DAY_SEC }
elsif( $period eq 'weekly' ) { $sec = WEEK_SEC }
elsif( $period eq 'yearly' ) { $sec = YEAR_SEC;
$self->_complain("updatePeriod of 'yearly' is somewhat ill-advised");
}
elsif( $period eq 'monthly') { $sec = MONTH_SEC;
$self->_complain("updatePeriod of 'monthly' is ill-advised");
}
else {
$self->boom("updatePeriod value \"$period\" is invalid.\n"
. "Use (hourly|daily|weekly|monthly|yearly)" );
}
DEBUG and print "Setting update period to $sec ($period)\n";
$self->{'updatePeriod_sec'} = $sec;
return $self->{'updatePeriod'} = $period;
}
#--------------------------------------------------------------------------
=item C<< $timing->lastPolled( I ) >>
This sets the time when you last polled this feed. If you don't set
this, the current time (C