# $Id: Parser.pm,v 1.13 2009/06/18 02:34:01 Martin Exp $ use strict; use warnings; =head1 NAME RDF::Simple::Parser - convert RDF string to bucket of triples =head1 DESCRIPTION A simple RDF/XML parser - reads a string containing RDF in XML returns a 'bucket-o-triples' (array of arrays) =head1 SYNOPSIS my $uri = 'http://www.zooleika.org.uk/bio/foaf.rdf'; my $rdf = LWP::Simple::get($uri); my $parser = RDF::Simple::Parser->new(base => $uri) my @triples = $parser->parse_rdf($rdf); # Returns an array of array references which are triples =head1 METHODS =over =cut package RDF::Simple::Parser; use constant DEBUG => 0; use File::Slurp; use LWP::UserAgent; use RDF::Simple::Parser::Handler; use XML::SAX qw(Namespaces Validation); our $VERSION = do { my @r = (q$Revision: 1.13 $ =~ /\d+/g); sprintf "%d."."%03d" x $#r, @r }; # Use a hash to implement objects of this type: use Class::MakeMethods::Standard::Hash ( new => 'new', scalar => [ qw( base http_proxy )], ); =item new( [ base => 'http://example.com/foo.rdf' ]) Create a new RDF::Simple::Parser object. 'base' supplies a base URI for relative URIs found in the document 'http_proxy' optionally supplies the address of an http proxy server. If this is not given, it will try to use the default environment settings. =cut =item parse_rdf($rdf) Accepts a string which is an RDF/XML document (complete XML, with headers) Returns an array of array references which are RDF triples. =cut sub parse_rdf { my ($self, $rdf) = @_; DEBUG && print STDERR " DDD Parser::parse_rdf()\n"; my $handler = new RDF::Simple::Parser::Handler(q{deprecated argument}, qnames => 1, base => $self->base, ); # Save (a reference to) our handler for future reference: $self->{_handler_} = $handler; my $factory = new XML::SAX::ParserFactory; $factory->require_feature(Namespaces); my $parser = $factory->parser(Handler => $handler); $parser->parse_string($rdf); my $res = $handler->result; return $res ? @$res : $res; } # parse_rdf =item parse_file($sFname) Takes one argument, a string which is a fully qualified filename. Reads the contents of that file, parses it as RDF, and returns the same thing as parse_rdf(). =cut sub parse_file { my $self = shift; my $sFname = shift || return; my $sRDF = read_file($sFname) || return; return $self->parse_rdf($sRDF); } # parse_file =item parse_uri($uri) Accepts a string which is a fully qualified http:// uri at which some valid RDF lives. Fetches the remote file and returns the same thing as parse_rdf(). =cut sub parse_uri { my $self = shift; my $uri = shift || return; my $rdf; eval { $rdf = $self->ua->get($uri)->content; }; warn ($@) if $@; if ($rdf && ($rdf ne q{})) { # print STDERR " DDD will parse_rdf(===$rdf===)\n"; $self->base($uri); return $self->parse_rdf($rdf); } # if } # parse_uri =item getns Returns a hashref of all namespaces found in the document. =cut sub getns { my $self = shift or return; my $handler = $self->{_handler_} or return; my $ns = $handler->ns or return; return $ns->{_lookup}; } # getns sub ua { my $self = shift; unless ($self->{_ua}) { $self->{_ua} = LWP::UserAgent->new(timeout => 30); if ($self->http_proxy) { $self->{_ua}->proxy('http',$self->http_proxy); } else { $self->{_ua}->env_proxy; } } return $self->{_ua}; } # ua =back =head1 BUGS Please report bugs via the RT web site L =head1 AUTHOR Jo Walsh Currently maintained by Martin Thurn =head1 LICENSE This module is available under the same terms as perl itself =cut 1; __END__