package TV::Anytime; use strict; use warnings; use DateTime; use DateTime::Format::ISO8601; use DateTime::Format::Duration; use File::Find::Rule; use List::Util; use Path::Class; use TV::Anytime::Event; use TV::Anytime::Genre; use TV::Anytime::Group; use TV::Anytime::Program; use TV::Anytime::Service; use XML::LibXML; use XML::LibXML::XPathContext; use base 'Class::Accessor::Chained::Fast'; __PACKAGE__->mk_accessors(qw(directory)); our $VERSION = '0.31'; sub new { my $class = shift; my $directory = shift; die "$directory not a directory" unless -d $directory; die "$directory does not contain ServiceInformation.xml" unless -f file($directory, "ServiceInformation.xml"); my $self = {}; bless $self, $class; $self->directory($directory); return $self; } sub _find_files { my ($self, $id, $type) = @_; my @files = File::Find::Rule->file->name("*${id}_${type}.xml")->in($self->directory); return sort @files; } sub _programs { my ($self, $id) = @_; my @programs = $self->_program_information($id); my @events = $self->_program_location($id); my %programs; $programs{ $_->id } = $_ foreach @programs; my %events; foreach my $event (@events) { $event->program($programs{ $event->crid }); push @{ $events{ $event->crid } }, $event; } foreach my $program (@programs) { $program->events_ref($events{ $program->id }); } return \@programs, \@events; } sub _program_information { my ($self, $id) = @_; my @programs; foreach my $file ($self->_find_files($id, "pi")) { push @programs, $self->_program_information_single($id, $file); } return @programs; } my %flags = ( 'AD' => 'is_audio_described', 'S' => 'is_subtitled', 'SL' => 'is_deaf_signed', ); sub _program_information_single { my ($self, $id, $filename) = @_; my $xpc = $self->_parse_file($filename); my @programs; foreach my $node ($xpc->findnodes("//tva:ProgramInformation")) { my $program = TV::Anytime::Program->new; $program->id($node->getAttribute('programId')); $program->title($xpc->findvalue(".//tva:Title", $node)); $program->synopsis($xpc->findvalue(".//tva:Synopsis[attribute::length='short']", $node)); $program->synopsis_long($xpc->findvalue(".//tva:Synopsis[attribute::length='long']", $node)); # clean up synopsis foreach my $s (qw(synopsis synopsis_long)) { my $synopsis = $program->$s; $synopsis =~s /^(CBeebies:?|CBBC|\[Ages? \d+-\d+\])\.? //; # fix title when title is Julian Fellowes Investigates... # and synopsis is ...a Most Mysterious Murder. The Case of etc. if ($synopsis =~ s/^\.\.\. ?//) { my $title = $program->title; $title =~ s/\.\.\.//; $synopsis =~ s/^(.+?)\. //; if ($1) { $title .= ' ' . $1; $title =~ s/ {2,}/ /; $program->title($title); } } $program->$s($synopsis); } # extract audio described / subtitled / deaf_signed from synopsis foreach my $s (qw(synopsis synopsis_long)) { my $synopsis = $program->$s; next unless $synopsis =~ s/\[([A-Z,]+)\]//; my $flags = $1; foreach my $flag (split ",", $flags) { my $method = $flags{$flag} || next; # bad data $program->$method(1); } $program->$s($synopsis); } $program->caption_language( $xpc->findvalue(".//tva:CaptionLanguage", $node)); $program->audio_channels($xpc->findvalue(".//tva:NumOfChannels", $node)); $program->aspect_ratio($xpc->findvalue(".//tva:AspectRatio", $node)); my @member_of; foreach my $subnode ($self->_xpc($node)->findnodes(".//tva:MemberOf")) { push @member_of, $subnode->getAttribute('crid'); } $program->member_of(\@member_of); my @genres; foreach my $subnode ($self->_xpc($node)->findnodes(".//tva:Genre")) { my $href = $subnode->getAttribute('href'); $href =~ s/^urn:tva:metadata:cs:(.+?):.+$/$1/; push @genres, TV::Anytime::Genre->new( { name => $href, value => $self->_xpc($subnode)->findvalue("./tva:Name"), } ); } $program->genres_ref(\@genres); push @programs, $program; } return @programs; } sub _program_location { my ($self, $id) = @_; my @events; foreach my $file ($self->_find_files($id, "pl")) { push @events, $self->_program_location_single($id, $file); } return @events; } sub _program_location_single { my ($self, $id, $filename) = @_; my $xpc = $self->_parse_file($filename); my @events; foreach my $node ($xpc->findnodes("//tva:ScheduleEvent")) { my $nodexpc = $self->_xpc($node); my $event = TV::Anytime::Event->new; $event->crid($nodexpc->findnodes("./tva:Program", $node)->get_node(0) ->getAttribute('crid')); $event->start( $self->_parse_date($nodexpc->findvalue('./tva:PublishedStartTime'))); my $duration = $self->_parse_duration($nodexpc->findvalue('./tva:PublishedDuration')); $event->stop($event->start + $duration); # warn $event->crid . ": " . $event->start->datetime . " -> " . $event->stop->datetime . "\n" if $event->start->datetime =~ /2005-08-.?.?T07:00/; # eq 'crid://bbc.co.uk/277092412' #or $event->crid eq 'crid://bbc.co.uk/277092882'; push @events, $event; } return @events; } sub groups { my $self = shift; my @services; my $xpc = $self->_parse_file("groups_cr.xml"); my ($members, $parents); foreach my $node ($xpc->findnodes("//cr:Result")) { my $id = $node->getAttribute("CRID"); my @members = map { $_->textContent } $self->_xpc($node)->findnodes(".//cr:Crid"); $members->{$id} = \@members; push @{ $parents->{$_} }, $id foreach @members; } $xpc = $self->_parse_file("groups_gr.xml"); my @groups; foreach my $node ($xpc->findnodes("//tva:GroupInformation")) { my $id = $node->getAttribute("groupId"); my $members = $members->{$id}; next unless $members; push @groups, TV::Anytime::Group->new( { id => $id, type => $self->_xpc($node)->findnodes("./tva:GroupType")->[0] ->getAttribute("value"), title => $self->_xpc($node)->findvalue(".//tva:Title"), members_ref => $members, parents_ref => $parents->{$id}, } ); } return @groups; } sub services { my $self = shift; my @services; my $xpc = $self->_parse_file("ServiceInformation.xml"); foreach my $node ($xpc->findnodes("//tva:ServiceInformation")) { my @genres; foreach my $subnode ($self->_xpc($node)->findnodes("./tva:ServiceGenre")) { my $href = $subnode->getAttribute('href'); $href =~ s/^urn:tva:metadata:cs:(.+?):.+$/$1/; push @genres, TV::Anytime::Genre->new( { name => $href, value => $self->_xpc($subnode)->findvalue("./tva:Name"), } ); } push @services, TV::Anytime::Service->new( { anytime => $self, id => $node->getAttribute('serviceId'), name => $xpc->findvalue("./tva:Name", $node), owner => $xpc->findvalue("./tva:Owner", $node), logo => $xpc->findvalue("./tva:Logo", $node), genres_ref => \@genres, } ); } return @services; } sub services_television { my $self = shift; return grep { $_->is_television } $self->services; } sub services_radio { my $self = shift; return grep { $_->is_radio } $self->services; } sub _parse_file { my ($self, $filename) = @_; my $directory = $self->directory; my $path = $filename; $path = dir($self->directory, $filename) unless $filename =~ /$directory/; my $parser = XML::LibXML->new; my $doc = $parser->parse_file($path); return $self->_xpc($doc); } sub _xpc { my ($self, $node) = @_; my $xpc = XML::LibXML::XPathContext->new($node); $xpc->registerNs('tva', 'urn:tva:metadata:2002'); $xpc->registerNs('rss', 'http://purl.org/rss/1.0/'); $xpc->registerNs('cr', 'http://www.tv-anytime.org/2002/02/ContentReferencing'); return $xpc; } sub _parse_date { my ($self, $string) = @_; my $dt = DateTime::Format::ISO8601->parse_datetime($string); return $dt; } sub _parse_duration { my ($self, $string) = @_; my $d = DateTime::Format::Duration->new(pattern => 'PT%HH%MM%SS',); return $d->parse_duration($string); } 1; __END__ =head1 NAME TV::Anytime - Parse TV-AnyTime bundles of TV and Radio listings =head1 SYNOPSIS use TV::Anytime; my $tv = TV::Anytime->new("data/20050701/"); # Find out what services are available my @services = $tv->services; my @radio_services = $tv->services_radio; my @tv_services = $tv->services_television; my @groups = $tv->groups; =head1 DESCRIPTION The L module parses TV-Anytime bundles. TV-Anytime is a format organised by the TV-Anytime Forum (L). These are open standards (see ETSI TS102822) for the rich description of Radio, Television and other types of media. The metadata specification includes a comprehensive genre scheme, methods of linking and grouping programmes, listing credits and lots of other data fields. This module is concerned with parsing TV-Anytime files that are shipped by the British Broadcasting Corporation from L. It is assumed that you have downloaded a .tar.gz from this site and have unpacked it. =head1 METHODS =head2 new() The new() method is the constructor. It takes the directory into which you have unpacked the TV-Anytime files: my $tv = TV::Anytime->new("data/20050701/"); =head2 groups The groups() method returns a list of all the available groups as a list of L objects: my @groups = $tv->groups; =head2 services The services() method returns a list of all the available services as a list of L objects: my @services = $tv->services; =head2 services_radio The services_radio() method returns a list of the available radio services as a list of L objects: my @radio_services = $tv->services_radio; =head2 services_television The serviices_television() method returns a list of all the available television services as a list of L objects: my @tv_services = $tv->services_television; =head1 SEE ALSO L =head1 BUGS Please report any bugs or feature requests to C, or through the web interface at L. =head1 AUTHOR Leon Brocard C =head1 LICENCE AND COPYRIGHT Copyright (c) 2005, Leon Brocard C. All rights reserved. This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.