package Data::Microformat::hFeed::hEntry; use strict; use base qw(Data::Microformat); use Data::Microformat::hCard; use DateTime::Format::W3CDTF; sub class_name { "hentry" } sub singular_fields { qw(id base geo title content summary modified issued link author) } sub plural_fields { qw(tags) } sub from_tree { my $class = shift; my $tree = shift; my $url = shift; my @entries; foreach my $entry_tree ($tree->look_down('class', qr/hentry/)) { push @entries, $class->_convert($entry_tree, $url); } return wantarray ? @entries : $entries[0]; } sub _convert { my $class = shift; my $tree = shift; my $url = shift; my $entry = $class->new; $entry->{_no_dupe_keys} = 1; $entry->base($url) if $url; my $title; my $id; $tree->look_down(sub { my $bit = shift; my $entry_class = $bit->attr('class') || $bit->attr('rel') || $bit->tag || return 0; # TODO # Summary and Content as html if (!$entry_class) { return 0; } elsif (_match($entry_class, 'hentry')) { $entry->id($bit->attr('id')); } elsif (_match($entry_class, 'entry-title')) { $entry->title($bit->as_text); $entry->id($bit->attr('id')); } elsif ($entry_class =~ m!^h\d!) { $title = $bit->as_text unless defined $title; $id = $bit->attr('id') unless defined $id; } elsif (_match($entry_class, 'entry-summary')) { $entry->summary($class->_get_child_html_from_element($bit)); } elsif (_match($entry_class, 'entry-content')) { $entry->content($class->_get_child_html_from_element($bit)); } elsif (_match($entry_class, 'published')) { $entry->issued(_do_date($bit)); } elsif (_match($entry_class, 'modified')) { $entry->modified(_do_date($bit)); } elsif (_match($entry_class,'vcard')) { my $card = Data::Microformat::hCard->from_tree($bit); $entry->author($card); } elsif (_match($entry_class, 'geo')) { $bit->detach; my $geo = Data::Microformat::geo->from_tree($bit, $url); $entry->geo($geo); } elsif (_match($entry_class, 'bookmark')) { $entry->link($class->_url_decode($bit->attr('href'))); } elsif (_match($entry_class, 'tag')) { $entry->tags($bit->as_text); } else { # print "Unknown class $entry_class\n"; } return 0; }); # Use first h tag we found if title or id isn't set $entry->title($title) if defined $title; $entry->title($id) if defined $id; $entry->{_no_dupe_keys} = 0; return $entry; } sub _do_date { my $element = shift; my $title = $element->attr('title') || return; $title =~ s!(([+-])(\d+))$!$2.join(":", grep {length} split /(\d\d)/, $3)!e; # egregious hack to work with malformed dates my $dt = eval { DateTime::Format::W3CDTF->parse_datetime($title) }; print "Aargh: $@\n".caller()."\n" if $@; return $dt; } sub _match { my $field = shift || return 0; my $target = shift; return $field =~ m!(^|\s*)$target(\s*|$)!i; } sub _to_hcard_elements { my $entry = shift; my $root = HTML::Element->new('div', class => 'hentry entry'); # id $root->attr('id', $entry->id) if $entry->id; # title if ($entry->title) { my $title = HTML::Element->new('h3', class => 'entry-title'); if ($entry->link) { my $a = HTML::Element->new('a', href => $entry->link, rel => 'bookmark', title => $entry->title ); $a->push_content($entry->title); $title->push_content($a); } else { $title->push_content($entry->title); } $root->push_content($title); } # summary # content foreach my $attr (qw(summary content)) { next unless $entry->$attr; my $div = HTML::Element->new('div', class => "entry-$attr"); $div->push_content($entry->$attr); $root->push_content($div); } # post info my $post_info = HTML::Element->new('ul'); my $saw_something = 0; # published # modified foreach my $date (qw(issued modified)) { my $val = $entry->$date || next; $saw_something++; my $name = ($date eq 'issued') ? 'published' : $date; my $li = HTML::Element->new('li'); $li->push_content(ucfirst($name).": "); my $abbr = HTML::Element->new('abbr', class => $name, title => DateTime::Format::W3CDTF->format_datetime($entry->$date)); $abbr->push_content($entry->$date->strftime("%B %d, %Y")); $li->push_content($abbr); $post_info->push_content($li); } # author # geo foreach my $attr (qw(author geo)) { next unless $entry->$attr; $saw_something++; my $li = HTML::Element->new('li'); $li->push_content($entry->$attr->_to_hcard_elements); $post_info->push_content($li); } $root->push_content($post_info) if $saw_something; return $root; } 1; __END__ =head1 NAME Data::Microformat::hFeed::hEntry - A module to parse and create hEntries =head1 SYNOPSIS use Data::Microformat::hFeed::hEntry; my $entry = Data::Microformat::hFeed:hEntry->parse($a_web_page); print "Entry title is ".$entry->title; print "Entry author is ".$entry->author->fullname; # Create a new entry from scratch my $entry = Data::Microformat::hFeed::hEntry->new; $entry->id(rand().time().$$); $entry->title("A title"); $entry->link("http://example.com/989691066"); $entry->summary("A summary"); $entry->content("Somebody did something"); $entry->issued(DateTime->now); $entry->modified(DateTime->now); foreach my $tag (qw(tag1 tag2 tag3)) { $entry->tags($tags); } $entry->author($hcard); =head1 DESCRIPTION An hEntry is a microformat used in hFeeds. This module exists both to parse existing hEntires from web pages, and to create new hEntries so that they can be put onto the Internet. To use it to parse an existing hEntry (or hEntries), simply give it the content of the page containing them (there is no need to first eliminate extraneous content, as the module will handle that itself): my $entry = Data::Microformat::hFeed::hEntry->parse($content); If you would like to get all the entries on the webpage, simply ask using an array: my @entries = Data::Microformat::hFeed::hEntry->parse($content); To create a new hEntry, first create the new object: my $entry = Data::Microformat::hFeed::hEntry->new; Then use the helper methods to add any data you would like. When you're ready to output in the hEntry HTML format, simply write my $output = $entry->to_html; And $output will be filled with an hEntry representation, using