=head1 NAME
SVG::Metadata - Perl module to capture metadata info about an SVG file
=head1 SYNOPSIS
use SVG::Metadata;
my $svgmeta = new SVG::Metadata;
$svgmeta->parse($filename)
or die "Could not parse $filename: " . $svgmeta->errormsg();
$svgmeta2->parse($filename2)
or die "Could not parse $filename: " . $svgmeta->errormsg();
# Do the files have the same metadata (author, title, license)?
if (! $svgmeta->compare($svgmeta2) ) {
print "$filename is different than $filename2\n";
}
if ($svgmeta->title() eq '') {
$svgmeta->title('Unknown');
}
if ($svgmeta->author() eq '') {
$svgmeta->author('Unknown');
}
if ($svgmeta->license() eq '') {
$svgmeta->license('Unknown');
}
if (! $svgmeta->keywords()) {
$svgmeta->addKeyword('unsorted');
} elsif ($svgmeta->hasKeyword('unsorted') && $svgmeta->keywords()>1) {
$svgmeta->removeKeyword('unsorted');
}
print $svgmeta->to_text();
=head1 DESCRIPTION
This module provides a way of extracting, browsing and using RDF
metadata embedded in an SVG file.
The SVG spec itself does not provide any particular mechanisms for
handling metadata, but instead relies on embedded, namespaced RDF
sections, as per XML philosophy. Unfortunately, many SVG tools don't
support the concept of RDF metadata; indeed many don't support the idea
of embedded XML "islands" at all. Some will even ignore and drop the
rdf data entirely when encountered.
The motivation for this module is twofold. First, it provides a
mechanism for accessing this metadata from the SVG files. Second, it
provides a means of validating SVG files to detect if they have the
metadata.
The motivation for this script is primarily for the Open Clip Art
Library (http://www.openclipart.org), as a way of filtering out
submissions that lack metadata from being included in the official
distributions. A secondary motivation is to serve as a testing tool for
SVG editors like Inkscape (http://www.inkscape.org).
=head1 FUNCTIONS
=cut
package SVG::Metadata;
use 5.006;
use strict;
use warnings;
use XML::Twig;
use HTML::Entities;
# use Data::Dumper; # DEBUG
require Exporter;
our @ISA = qw(Exporter);
our @EXPORT_OK = ();
our $VERSION = '0.28';
use fields qw(
_title
_description
_subject
_publisher
_publisher_url
_creator
_creator_url
_owner
_owner_url
_license
_license_date
_keywords
_language
_about_url
_date
_retain_xml
_strict_validation
_try_harder
_ERRORMSG
_RETAINED_XML
_RETAINED_DECLARATION
);
use vars qw( %FIELDS $AUTOLOAD );
=head2 new()
Creates a new SVG::Metadata object. Optionally, can pass in arguments
'title', 'author', 'license', etc..
my $svgmeta = new SVG::Metadata;
my $svgmeta = new SVG::Metadata(title=>'My title', author=>'Me', license=>'Public Domain');
=cut
sub new {
my $class = shift;
my %args = @_;
my $self = bless [\%FIELDS], $class;
while (my ($field, $value) = each %args) {
$self->{"_$field"} = $value
if (exists $FIELDS{"_$field"});
}
$self->{_creator} ||= $args{author} || '';
$self->{_language} ||= 'en';
$self->{_ERRORMSG} = '';
$self->{_strict_validation} = 0;
return $self;
}
# This automatically generates all the accessor functions for %FIELDS
sub AUTOLOAD {
my $self = shift;
my $attr = $AUTOLOAD;
$attr =~ s/.*:://;
return unless $attr =~ /[^A-Z]/; # skip DESTROY and all-cap methods
die "Invalid attribute method: ->$attr()\n" unless exists $FIELDS{"_$attr"};
$self->{"_$attr"} = shift if @_;
return $self->{"_$attr"};
}
=head2 author()
Alias for creator()
=cut
sub author {
my $self = shift;
return $self->creator(@_);
}
=head2 keywords_to_rdf()
Generates an rdf:Bag based on the data structure of keywords.
This can then be used to populate the subject section of the metadata.
I.e.:
$svgobj->subject($svg->keywords_to_rdf());
See:
http://www.w3.org/TR/rdf-schema/#ch_bag
http://www.w3.org/TR/rdf-syntax-grammar/#section-Syntax-list-element
http://dublincore.org/documents/2002/05/15/dcq-rdf-xml/#sec2
=cut
sub keywords_to_rdf {
my $self = shift;
my $text = '';
foreach my $keyword ($self->keywords()) {
$keyword = $self->esc_ents($keyword);
$text .= qq( $keyword\n);
}
if ($text ne '') {
return qq( \n$text );
} else {
return '';
}
}
=head2 errormsg()
Returns the last encountered error message. Most of the error messages
are encountered during file parsing.
print $svgmeta->errormsg();
=cut
sub errormsg {
my $self = shift;
return $self->{_ERRORMSG} || '';
}
=head2 parse($filename)
Extracts RDF metadata out of an existing SVG file.
$svgmeta->parse($filename) || die "Error: " . $svgmeta->errormsg();
This routine looks for a field in the rdf:RDF section of the document
named 'ns:Work' and then attempts to load the following keys from it:
'dc:title', 'dc:rights'->'ns:Agent', and 'ns:license'. If any are
missing, it
The $filename parameter can be a filename, or a text string containing
the XML to parse, or an open 'IO::Handle', or a URL.
Returns false if there was a problem parsing the file, and sets an
error message appropriately. The conditions under which it will return
false are as follows:
* No 'filename' parameter given.
* Filename does not exist.
* Document is not parseable XML.
* No rdf:RDF element was found in the document, and the try harder
option was not set.
* The rdf:RDF element did not have a ns:Work sub-element, and the
try_harder option was not set.
* Strict validation mode was turned on, and the document didn't
strictly comply with one or more of its extra criteria.
=cut
sub parse {
my ($self, $filename, %optn) = @_;
my $retaindecl;
# For backward-compatibility, support retain_xml as an option here:
if ($optn{retain_xml}) { $self->retain_xml($optn{retain_xml}); }
if (! defined($filename)) {
$self->{_ERRORMSG} = "No filename or text argument defined for parsing";
return;
}
my $twig = XML::Twig->new( map_xmlns => {
'http://www.w3.org/2000/svg' => "svg", # W3C's SVG namespace
'http://www.w3.org/1999/02/22-rdf-syntax-ns#' => "rdf", # W3C's metadata namespace
'http://purl.org/dc/elements/1.1/' => "dc", # Dublin Core metadata namespace
'http://web.resource.org/cc/' => "cc", # a license description namespace
},
pretty_print => 'indented',
comments => 'keep',
pi => 'keep',
keep_original_prefix => 1, # prevents superfluous svg:element prefixing.
);
if ($filename =~ m/\n.*\n/ || (ref $filename eq 'IO::Handle')) {
# Hmm, if it has newlines, it is likely to be a string instead of a filename
eval { $twig->parse($filename); };
if ($@) { $self->{_ERRORMSG} = "XML::Twig died; this may mean invalid XML."; return; }
if ($self->{_retain_xml}) {
($retaindecl) = $filename =~ /(.*?)(