# $Id: Serialiser.pm,v 1.12 2009-07-04 14:55:24 Martin Exp $ package RDF::Simple::Serialiser; use strict; use constant DEBUG => 0; =head1 NAME RDF::Simple::Serialiser - convert a list of triples to RDF =head1 DESCRIPTION A simple RDF serialiser. Accepts an array of triples, returns a serialised RDF document. =head1 SYNOPSIS my $ser = RDF::Simple::Serialiser->new( # OPTIONAL: Supply your own bNode id prefix: nodeid_prefix => 'a:', ); # OPTIONAL: Add your namespaces: $ser->addns( foaf => 'http://xmlns.com/foaf/0.1/', ); my $node1 = $ser->genid; my $node2 = $ser->genid; my @triples = ( ['http://example.com/url#', 'dc:creator', 'zool@example.com'], ['http://example.com/url#', 'foaf:Topic', '_id:1234'], ['_id:1234','http://www.w3.org/2003/01/geo/wgs84_pos#lat','51.334422'] [$node1, 'foaf:name', 'Jo Walsh'], [$node1, 'foaf:knows', $node2], [$node2, 'foaf:name', 'Robin Berjon'], [$node1, 'rdf:type', 'foaf:Person'], [$node2, 'rdf:type','http://xmlns.com/foaf/0.1/Person'] [$node2, 'foaf:url', \'http://server.com/NOT/an/rdf/uri.html'], ); my $rdf = $ser->serialise(@triples); ## Round-trip example: my $parser = RDF::Simple::Parser->new(); my $rdf = LWP::Simple::get('http://www.zooleika.org.uk/foaf.rdf'); my @triples = $parser->parse_rdf($rdf); my $new_rdf = $serialiser->serialise(@triples); =head1 METHODS =over =cut use Data::Dumper; use RDF::Simple::NS; use Regexp::Common qw(URI); use Class::MakeMethods::Standard::Hash ( new => 'new', scalar => [ qw( baseuri path nodeid_prefix qqq ) ], ); our $VERSION = do { my @r = (q$Revision: 1.12 $ =~ /\d+/g); sprintf "%d."."%03d" x $#r, @r }; =item new() =item new(nodeid_prefix => 'prefix') =cut =item serialise( @triples ) Accepts a 'bucket of triples' (an array of array references which are [subject, predicate, object] statements) and returns a serialised RDF document. If 'rdf:type' is not provided for a subject, the generic node type 'rdf:Description' is used. =cut sub serialise { my ($self,@triples) = @_; my %object_ids; foreach (@triples) { push @{$object_ids{$_->[0]}}, $_; } # foreach my @objects; foreach my $k (keys %object_ids) { push @objects, $self->_make_object(@{$object_ids{$k}}); } # foreach my %ns_lookup = $self->_ns->lookup; my %ns = (); my $used = $self->_used; foreach (keys %$used) { $ns{$_} = $ns_lookup{$_}; } # foreach my $xml = $self->render(\@objects, \%ns); return $xml; } # serialise =item serialize A synonym for serialise() for American users. =cut sub serialize { my $self = shift; return $self->serialise(@_); } # serialize # _make_object() is called on each subset of triples that have the # same subject. sub _make_object { my $self = shift; # Make a copy of our array-ref arguments, so we can modify them # locally: my @triples; foreach my $ra (@_) { push @triples, [@$ra]; } # foreach # DEBUG && print STDERR " DDD in _make_object(), triples is ", Dumper(\@triples); my $object; my $rdf = $self->_ns; # Convert the predicate of each triple into a legal qname: @triples = map {$_->[1] = $rdf->qname($_->[1]); $_} @triples; # Find the type declaration of this subject (assume there is only one): my ($class) = grep {$_->[1] eq 'rdf:type'} @triples; # DEBUG && print STDERR " DDD in _make_object(), class is ", Dumper($class); foreach my $t (@triples) { # Register the namespace of (all) the predicates: $self->_used($t->[1]); my $qn = $rdf->qname($t->[0]); if ($qn ne $t->[0]) { # Register the namespace of (all) the subject(s): $self->_used($qn); } # if } # foreach # $self->_used('rdf:Description'); if ($class) { # This bag of triples has a Class explicitly declared: $object->{Class} = $rdf->qname($class->[2]); } else { # This bag of triples needs a generic Description Class: $object->{Class} = 'rdf:Description'; } # Register the namespace of this subject's Class: $self->_used($object->{Class}); # Assign identifier as an arbitrary (but resolving) uri: my $id = $triples[0]->[0]; if ( $self->_looks_like_uri($id) || $self->_looks_like_legal_id($id) || (($id =~ m/^[#:]/) && $self->_looks_like_legal_id(substr($id,1))) ) { $object->{Uri} = $id; } # if else { # Delete non-alphanumeric characters: $id =~ s/\W//g; $object->{NodeId} = $id; } my $pref = $self->nodeid_prefix || '_id:'; STATEMENT: foreach my $statement (@triples) { next if $statement->[1] eq 'rdf:type'; my $obj = $statement->[2]; DEBUG && print STDERR " DDD start processing object($obj)\n"; if (ref $obj) { # Special case: insert this value as a string, no matter what it # looks like: push @{ $object->{literal}->{$statement->[1]} }, ${$obj}; } elsif ($obj =~ m/^$pref/) { $statement->[2] =~ s/\A[^a-zA-Z]/a/; $statement->[2] =~ s/\W//g; push @{ $object->{nodeid}->{$statement->[1]} }, $obj; } # if elsif ( $self->_looks_like_uri($obj) || $self->_looks_like_legal_id($obj) || ( ($obj =~ m/^[#:]/) && $self->_looks_like_legal_id(substr($obj, 1)) ) ) { push @{ $object->{resource}->{$statement->[1]} }, $obj; } else { push @{ $object->{literal}->{$statement->[1]} }, $obj; } } # foreach return $object; } # _make_object sub _looks_like_uri { my $self = shift; my $s = shift || ''; return ( ($s =~ m/$RE{URI}/) && # The URI we're interested in are specifically those URI # that can refer to an element of an ontology; these always # look like namespace#name ($s =~ m/.#./) ); } # _looks_like_uri sub _looks_like_legal_id { my $self = shift; my $s = shift || ''; return ( # Starts with alphanumeric: ($s =~ m/\A\w/) && # Only consists of alphanumerics plus a few punctuations. # I'm not sure what the correct set of characters is, even # after reading the RDF specification (it only refers to # full URIs): ($s =~ m/\A[-:_a-z0-9]+\z/) ); } # _looks_like_legal_id =item addns( qname => 'http://example.com/rdf/vocabulary#', qname2 => 'http://yetanother.org/vocabulary/' ) Use this method to add new namespaces to the RDF document. The RDF::Simple::NS module provides the following vocabularies by default (you can override them if you wish): foaf => 'http://xmlns.com/foaf/0.1/', dc => 'http://purl.org/dc/elements/1.1/', rdfs => 'http://www.w3.org/2000/01/rdf-schema#', daml => 'http://www.w3.org/2001/10/daml+oil#', space => 'http://frot.org/space/0.1/', geo => 'http://www.w3.org/2003/01/geo/wgs84_pos#', rdf => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', owl => 'http://www.w3.org/2002/07/owl#', ical => 'http://www.w3.org/2002/12/cal/ical#', dcterms => 'http://purl.org/dc/terms/', wiki => 'http://purl.org/rss/1.0/modules/wiki/', chefmoz => 'http://chefmoz.org/rdf/elements/1.0/', =cut sub addns { my $self = shift; my %p; if (ref $_[0] eq 'HASH') { %p = %{$_[0]}; } else { %p = @_; } return $self->_ns->lookup(%p); } # addns =item genid( ) generates a random identifier for use as a bNode (anonymous node) nodeID. if nodeid_prefix is set, the generated id uses the prefix, followed by 8 random numbers. =cut sub genid { my $self = shift; my $prefix = $self->nodeid_prefix || '_id:'; my @num = (0..9); my $string = join '', (map { @num[rand @num] } 0..7); return $prefix.$string; } # genid sub _ns { my $self = shift; return $self->{_rdfns} if $self->{_rdfns}; $self->{_rdfns} = RDF::Simple::NS->new; } # _ns sub _used { my ($self, $uri) = @_; if (defined $uri and ($uri !~ m/^http/)) { my $pref = $self->_ns->prefix($uri); $self->{_used_entities}->{ $pref } = 1 if $pref; } return $self->{_used_entities}; } # _used =item render Does the heavy lifting of converting the "objects" to a string. Users of this module should call serialize(); Subclassers of this module will probably rewrite render(). =cut sub render { my ($self, $objects, $ns) = @_; my $xml = "{Class}; if ($object->{Uri}) { $xml .= ' rdf:about="'.$object->{Uri}.'"'; } # if else { $xml .= ' rdf:nodeID="'.$object->{NodeId}.'"'; } $xml .= ">\n"; LITERAL: foreach my $l (keys %{$object->{literal}}) { LITERAL_PROP: foreach my $prop (@{$object->{literal}->{$l}}) { $prop = _xml_escape($prop); $xml .= qq{<$l>$prop\n}; } # foreach LITERAL_PROP } # foreach LITERAL RESOURCE: foreach my $l (keys %{$object->{resource}}) { RESOURCE_PROP: foreach my $prop (@{$object->{resource}->{$l}}) { $xml .= qq{<$l rdf:resource="$prop"/>\n}; } # foreach RESOURCE_PROP } # foreach RESOURCE NODEID: foreach my $l (keys %{$object->{nodeid}}) { NODEID_PROP: foreach my $prop (@{$object->{nodeid}->{$l}}) { $xml .= qq{<$l rdf:nodeID="$prop"/>\n}; } # foreach NODEID_PROP } # foreach NODEID $xml .= '{Class} .">\n"; } # foreach OBJECT $xml .= "\n"; return $xml; } # render sub _xml_escape { my $s = shift || ''; # Make safe for XML: my %escape = ( q'<' => q'<', q'>' => q'>', q'&' => q'&', # ', # Emacs bug q'"' => q'"', ); my $escape_re = join(q'|', keys %escape); $s =~ s/($escape_re)/$escape{$1}/g; return $s; } # _xml_escape =back =head1 BUGS Please report bugs via the RT web site L =head1 NOTES The original author was British, so this is a Serialiser. For American programmers, RDF::Simple::Serializer will work as an alias to the module, and serialize() does the same as serialise(). The distinction between a URI and a literal string in the "object" (third element) of each triple is made as follows: if the object is a reference, it is output as a literal; if the object "looks like" a URI (according to Regexp::Common::URI), it is output as a URI. =head1 THANKS Thanks particularly to Tom Hukins, and also to Paul Mison, for providing patches. =head1 AUTHOR Originally written by Jo Walsh (formerly ). Currently maintained by Martin Thurn . =head1 LICENSE This module is available under the same terms as perl itself. =cut 1; __END__