#!/usr/bin/perl require 5.008001; # no good Unicode support? you lose package XML::Atom::SimpleFeed; $VERSION = "0.86"; use warnings FATAL => 'all'; use strict; use Carp; use Encode (); use POSIX (); sub ATOM_NS () { 'http://www.w3.org/2005/Atom' } sub XHTML_NS () { 'http://www.w3.org/1999/xhtml' } sub PREAMBLE () { qq(\n) } sub W3C_DATETIME () { '%Y-%m-%dT%H:%M:%SZ' } sub DEFAULT_GENERATOR () { { uri => 'http://search.cpan.org/dist/' . join( '-', split /::/, __PACKAGE__ ) . '/', version => __PACKAGE__->VERSION, name => __PACKAGE__, } } #################################################################### # superminimal XML writer # my %XML_ESC = ( "\xA" => ' ', "\xD" => ' ', '"' => '"', '&' => '&', "'" => ''', '<' => '<', '>' => '>', ); sub xml_cref { Encode::encode 'us-ascii', $_[ 0 ], Encode::HTMLCREF } sub xml_escape { $_[ 0 ] =~ s{ ( [<>&'"] ) }{ $XML_ESC{ $1 } }gex; goto &xml_cref; } sub xml_attr_escape { $_[ 0 ] =~ s{ ( [\x0A\x0D<>&'"] ) }{ $XML_ESC{ $1 } }gex; goto &xml_cref; } sub xml_cdata_flatten { for ( $_[0] ) { my $cdata_content; s{}{ xml_escape $cdata_content = $1 }ge; croak 'Incomplete CDATA section' if -1 < index $_, '[ $i ] . '="' . xml_attr_escape( $name->[ $i + 1 ] ) . '"'; $i += 2; } $name = $name->[ 0 ]; } @_ ? join( '', "<$name$attr>", @_, "" ) : "<$name$attr/>"; } #################################################################### # misc utility functions # sub natural_enum { my @and; unshift @and, pop @_ if @_; unshift @and, join ', ', @_ if @_; join ' and ', @and; } sub permalink { my ( $link_arg ) = ( @_ ); if( ref $link_arg ne 'HASH' ) { return $link_arg; } elsif( not exists $link_arg->{ rel } or $link_arg->{ rel } eq 'alternate' ) { return $link_arg->{ href }; } return; } #################################################################### # actual implementation of RFC 4287 # sub simple_construct { my ( $name, $content ) = @_; xml_tag $name, xml_escape $content; } sub person_construct { my ( $name, $arg ) = @_; my $prop = 'HASH' ne ref $arg ? { name => $arg } : $arg; croak "name required for $name element" if not exists $prop->{ name }; return xml_tag $name => ( map { xml_tag $_ => xml_escape $prop->{ $_ } } grep { exists $prop->{ $_ } } qw( name email uri ) ); } sub text_construct { my ( $name, $arg ) = @_; my ( $type, $content ); if( ref $arg eq 'HASH' ) { # FIXME doesn't support @src attribute for $name eq 'content' yet $type = exists $arg->{ type } ? $arg->{ type } : 'html'; croak "content required for $name element" unless exists $arg->{ content }; # a lof of the effort that follows is to omit the type attribute whenever possible # if( $type eq 'xhtml' ) { $content = xml_string $arg->{ content }; if( $content !~ / xmlns => XHTML_NS ], $content; } } elsif( $type eq 'html' or $type eq 'text' ) { $content = xml_escape $arg->{ content }; } else { croak "type '$type' not allowed in $name element" if $name ne 'content'; # FIXME non-XML/text media types must be base64 encoded! $content = xml_string $arg->{ content }; } } else { $type = 'html'; $content = xml_escape $arg; } if( $type eq 'html' and $content !~ /&/ ) { $type = 'text'; $content =~ s/[\n\t]+/ /g; } return xml_tag [ $name => $type ne 'text' ? ( type => $type ) : () ], $content; } sub empty_tag_maker { my ( $required_attr, @optional_attr ) = @_; sub { my ( $name, $arg ) = @_; if ( $name eq 'link' ) { # HACK: no other simple tag needs similar features so it's easiest # to hack them in here instead of providing an indirection # croak "link '$arg->{ href }' is not a valid URI" # if $arg->{ href } XXX TODO # omit atom:link/@rel value when possible delete $arg->{ rel } if ref $arg eq 'HASH' and exists $arg->{ rel } and $arg->{ rel } eq 'alternate'; } if( ref $arg eq 'HASH' ) { croak "$required_attr required for $name element" if not exists $arg->{ $required_attr }; my @attr = map { $_ => $arg->{ $_ } } grep exists $arg->{ $_ }, $required_attr, @optional_attr; xml_tag [ $name => @attr ]; } else { xml_tag [ $name => $required_attr => $arg ]; } } } # tag makers are called with the name of the tag they're supposed to handle as the first parameter my %make_tag = ( icon => \&simple_construct, id => \&simple_construct, logo => \&simple_construct, published => \&simple_construct, updated => \&simple_construct, author => \&person_construct, contributor => \&person_construct, title => \&text_construct, subtitle => \&text_construct, rights => \&text_construct, summary => \&text_construct, content => \&text_construct, link => empty_tag_maker( qw( href rel type title hreflang length ) ), category => empty_tag_maker( qw( term scheme label ) ), generator => sub { my ( $name, $arg ) = @_; if( ref $arg eq 'HASH' ) { croak 'name required for generator element' if not exists $arg->{ name }; my $content = delete $arg->{ name }; xml_tag [ generator => map +( $_ => $arg->{ $_ } ), grep exists $arg->{ $_ }, qw( uri version ) ], xml_escape( $content ); } else { xml_tag generator => xml_escape( $arg ); } }, ); sub container_content { my ( $name, %arg ) = @_; my ( $elements, $required, $optional, $singular, $deprecation, $callback ) = @arg{ qw( elements required optional singular deprecate callback ) }; my ( $content, %permission, %count, $permalink ); undef @permission{ @$required, @$optional }; # populate while( my ( $elem, $arg ) = splice @$elements, 0, 2 ) { if( exists $permission{ $elem } ) { $content .= $make_tag{ $elem }->( $elem, $arg ); ++$count{ $elem }; } else { croak "Unknown element $elem"; } if( $elem eq 'link' and defined ( my $alt = permalink $arg ) ) { $permalink = $alt unless $count{ 'alternate link' }++; } if( exists $callback->{ $elem } ) { $callback->{ $elem }->( $arg ) } if( not @$elements ) { # end of input? # we would normally fall off the bottom of the loop now; # before that happens, it's time to defaultify stuff and # put it in the input so we will keep going for a little longer if( not $count{ id } and defined $permalink ) { carp 'Falling back to alternate link as id'; push @$elements, id => $permalink; } if( not $count{ updated } ) { push @$elements, updated => $arg{ default_upd }; } } } my @error; my @missing = grep { not exists $count{ $_ } } @$required; my @toomany = grep { ( $count{ $_ } || 0 ) > 1 } 'alternate link', @$singular; push @error, 'requires at least one ' . natural_enum( @missing ) . ' element' if @missing; push @error, 'must have no more than one ' . natural_enum( @toomany ) . ' element' if @toomany; croak $name, ' ', join ' and ', @error if @error; return $content; } #################################################################### # implementation of published interface and rest of RFC 4287 # sub XML::Atom::SimpleFeed::new { my $self = bless {}, shift; my $arg = ( @_ and 'HASH' eq ref $_[0] ) ? shift : {}; $self->{ do_add_generator } = 1; $self->feed( @_ ) if @_; # support old-style invocation return $self; } sub XML::Atom::SimpleFeed::feed { my $self = shift; $self->{ meta } = container_content feed => ( elements => \@_, required => [ qw( id title updated ) ], optional => [ qw( author category contributor generator icon logo link rights subtitle ) ], singular => [ qw( generator icon logo id rights subtitle title updated ) ], callback => { author => sub { $self->{ have_default_author } = 1 }, updated => sub { $self->{ global_updated } = $_[ 0 ] }, generator => sub { $self->{ do_add_generator } = 0 }, }, default_upd => POSIX::strftime( W3C_DATETIME, gmtime ), ); return $self; } sub XML::Atom::SimpleFeed::add_entry { my $self = shift; my @required = qw( id title updated ); my @optional = qw( category content contributor link published rights summary ); push @{ $self->{ have_default_author } ? \@optional : \@required }, 'author'; # FIXME # # o atom:entry elements that contain no child atom:content element # MUST contain at least one atom:link element with a rel attribute # value of "alternate". # # o atom:entry elements MUST contain an atom:summary element in either # of the following cases: # * the atom:entry contains an atom:content that has a "src" # attribute (and is thus empty). # * the atom:entry contains content that is encoded in Base64; # i.e., the "type" attribute of atom:content is a MIME media type # [MIMEREG], but is not an XML media type [RFC3023], does not # begin with "text/", and does not end with "/xml" or "+xml". push @{ $self->{ entries } }, xml_tag entry => container_content entry => ( elements => \@_, required => \@required, optional => \@optional, singular => [ qw( content id published rights summary ) ], default_upd => $self->{ global_updated }, ); return $self; } sub XML::Atom::SimpleFeed::no_generator { my $self = shift; $self->{ do_add_generator } = 0; return $self; } sub XML::Atom::SimpleFeed::as_string { my $self = shift; if( $self->{ do_add_generator } ) { $self->{ meta } .= $make_tag{ generator }->( generator => DEFAULT_GENERATOR ); $self->{ do_add_generator } = 0; } PREAMBLE . xml_tag [ feed => xmlns => ATOM_NS ], $self->{ meta }, @{ $self->{ entries } }; } sub XML::Atom::SimpleFeed::print { my $self = shift; my ( $handle ) = @_; local $, = local $\ = ''; defined $handle ? print $handle $self->as_string : print $self->as_string; } sub XML::Atom::SimpleFeed::save_file { croak q{no longer supported, use 'print' instead and pass in a filehandle} } ! ! 'Funky and proud of it.'; __END__ =head1 NAME XML::Atom::SimpleFeed - No-fuss generation of Atom syndication feeds =head1 VERSION This document describes XML::Atom::SimpleFeed version 0.84 =head1 SYNOPSIS use XML::Atom::SimpleFeed; my $feed = XML::Atom::SimpleFeed->new( title => 'Example Feed', link => 'http://example.org/', link => { rel => 'self', href => 'http://example.org/atom', }, updated => '2003-12-13T18:30:02Z', author => 'John Doe', id => 'urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6', ); $feed->add_entry( title => 'Atom-Powered Robots Run Amok', link => 'http://example.org/2003/12/13/atom03', id => 'urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a', summary => 'Some text.', updated => '2003-12-13T18:30:02Z', category => 'Atom', category => 'Miscellaneous', ); $feed->print; =head1 DESCRIPTION This module provides a minimal API for generating Atom syndication feeds quickly and easily. It supports all aspects of the Atom format, but has no provisions for generating feeds with extension elements. You can supply strings for most things, and the module will provide useful defaults. When you want more control, you can provide data structures, as documented, to specify more particulars. =head1 INTERFACE =head2 C XML::Atom::SimpleFeed instances are created by the C constructor, which takes a list of key-value pairs as parameters. The keys are used to create the corresponding L<"Atom elements"|/ATOM ELEMENTS>. The following elements are available: =over =item * L> (I) =item * L> (I, multiple) =item * L> (B) =item * L> (optional, multiple) =item * L> (optional, multiple) =item * L> (optional, multiple) =item * L> (optional) =item * L> (optional) =item * L> (optional) =item * L> (optional) =item * L> (optional) =item * L> (optional) =back To specify multiple instances of an element that may be given multiple times, simply list multiple key-value pairs with the same key. =head2 C This method adds an entry into the Atom feed. It takes a list of key-value pairs as parameters. The keys are used to create the corresponding L<"Atom Elements"|/ATOM ELEMENTS>. The following elements are available: =over =item * L> (B unless there is a feed-level author, multiple) =item * L> (I) =item * L> (B, multiple) =item * L> (B) =item * L> (optional, multiple) =item * L> (optional) =item * L> (optional, multiple) =item * L> (optional) =item * L> (optional) =item * L> (optional) =item * L> (optional) =back To specify multiple instances of an element that may be given multiple times, simply list multiple key-value pairs with the same key. =head2 C Suppresses the output of a default C element. It is not necessary to call this method if you supply a custom C element. =head2 C Returns the XML representation of the feed as a string. =head2 C Outputs the XML representation of the feed to a handle which should be passed as a parameter. Defaults to C if you do not pass a handle. =head1 ATOM ELEMENTS =head2 C A L denoting the author of the feed or entry. If you supply at least one author for the feed, you can omit this information from entries; the feed's author(s) will be assumed as the author(s) for those entries. If you do not supply any author for the feed, you B supply one for each entry. =head2 C One or more categories that apply to the feed or entry. You can supply a string which will be used as the category term. The full range of details that can be provided by passing a hash instead of a string is as follows: =over =item C (B) The category term. =item C (optional) A URI that identifies a categorization scheme. It is common to provide the base of some kind of by-category URL here. F.ex., if the weblog C can be browsed by category using URLs such as C, you would supply C as the scheme and, in that case, C as the term. =item C