# $File: //member/autrijus/XML-RSS-Aggregate/lib/XML/RSS/Aggregate.pm $ $Author: autrijus $ # $Revision: #4 $ $Change: 2924 $ $DateTime: 2002/12/25 15:04:33 $ package XML::RSS::Aggregate; $XML::RSS::Aggregate::VERSION = '0.02'; use strict; use XML::RSS; use base 'XML::RSS'; use Date::Parse; use LWP::Simple 'get'; use HTML::Entities 'encode_entities'; =head1 NAME XML::RSS::Aggregate - RSS Aggregator =head1 SYNOPSIS my $rss = XML::RSS::Aggregate->new( # parameters for XML::RSS->channel() title => 'Aggregated Examples', link => 'http://blog.elixus.org/', # parameters for XML::RSS::Aggregate->aggregate() sources => [ qw( http://one.example.com/index.rdf http://another.example.com/index.rdf http://etc.example.com/index.rdf ) ], sort_by => sub { $_[0]->{dc}{subject} # default to sort by dc:date }, uniq_by => sub { $_[0]->{title} # default to uniq by link } ); $rss->aggregate( sources => [ ... ] ); # more items $rss->save("all.rdf"); =head1 DESCRIPTION This module implements a subclass of B, adding a single C method that fetches other RSS feeds and add to the object itself. It handles the proper ordering and duplication removal for aggregated links. Also, the constructor C is modified to take arguments to pass implicitly to C and C methods. All the base methods are still applicable to this module; please see L for details. =head1 METHODS =over 4 =item aggregate (sources=>\@url, sort_by=>\&func, uniq_by=>\&func) This method fetches all RSS feeds listed in C<@url> and pass their items to the object's C. The optional C argument specifies the function to use for ordering RSS items; it defaults to sort them by their C<{dc}{date}> attribute (converted to absolute timestamps), with ties broken by their C<{link}> attribute. The optional C argument specifies the function to use for removing duplicate RSS items; it defaults to remove items that has the same C<{link}> value. =back =cut sub new { my ($class, %args) = @_; my $version = delete($args{version}) || '1.0'; my $self = $class->SUPER::new( version => $version ); my $sources = delete($args{sources}); my $sort_by = delete($args{sort_by}); $self->channel(%args) if %args; $self->aggregate( sources => $sources, sort_by => $sort_by, ) if $sources; return $self; } sub aggregate { my ($self, %args) = @_; my $sources = $args{sources} or return; my $sort_by = $args{sort_by} || sub { my $date = $_[0]->{dc}{date}; $date =~ s/:(\d\d)$/$1/ if $date; sprintf("%20s", str2time($date)).$_[0]->{link} }; my $uniq_by = $args{uniq_by} || sub { $_[0]->{link} }; my $old_items = $self->{items} || []; $self->{items} = []; my %saw; $self->add_item(%{$_->[0]}) for sort { $b->[1] cmp $a->[1] } grep { $_->[1] } map { [ $_ => scalar($sort_by->($_)) ] } grep { !$saw{$uniq_by->($_)}++ } @{$old_items}, map { encode_entities($_, '&<>') for grep {!ref($_)} values %{$_}; $_ } map { encode_entities($_, '&<>') for grep {!ref($_)} values %{$_->{dc}}; $_ } map { encode_entities($_, '&<>') for grep {!ref($_)} values %{$_->{syn}}; $_ } map { encode_entities($_, '&<>') for grep {!ref($_)} @{$_->{taxo}}; $_ } map { eval { (my $rss = XML::RSS->new)->parse(get($_)); @{$rss->{items}} } } grep { /^\w+:/ } @{$sources}; return $self; } 1; =head1 SEE ALSO L =head1 AUTHORS Autrijus Tang Eautrijus@autrijus.orgE =head1 COPYRIGHT Copyright 2002 by Autrijus Tang Eautrijus@autrijus.orgE. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. See L =cut __END__ # Local variables: # c-indentation-style: bsd # c-basic-offset: 4 # indent-tabs-mode: nil # End: # vim: expandtab shiftwidth=4: