# RDF::Query # ----------------------------------------------------------------------------- =head1 NAME RDF::Query - An RDF query implementation of SPARQL/RDQL in Perl for use with RDF::Trine, RDF::Redland, and RDF::Core. =head1 VERSION This document describes RDF::Query version 2.200, released 6 August 2009. =head1 SYNOPSIS my $query = new RDF::Query ( $rdql, undef, undef, 'rdql' ); my @rows = $query->execute( $model ); my $query = new RDF::Query ( $sparql ); my $iterator = $query->execute( $model ); while (my $row = $iterator->next) { print $row->{ var }->as_string; } =head1 DESCRIPTION RDF::Query allows RDQL and SPARQL queries to be run against an RDF model, returning rows of matching results. See L for more information on SPARQL. See L for more information on RDQL. =head1 CHANGES IN VERSION 2.000 There are many changes in the code between the 1.x and 2.x releases. Most of these changes will only affect queries that should have raised errors in the first place (SPARQL parsing, queries that use undefined namespaces, etc.). Beyond these changes, however, there are some significant API changes that will affect all users: =over 4 =item Use of RDF::Trine objects All nodes and statements returned by RDF::Query are now RDF::Trine objects (more specifically, RDF::Trine::Node and RDF::Trine::Statement objects). This differes from RDF::Query 1.x where nodes and statements were of the same type as the underlying model (Redland nodes from a Redland model and RDF::Core nodes from an RDF::Core model). In the past, it was possible to execute a query and not know what type of nodes were going to be returned, leading to overly verbose code that required examining all nodes and statements with the bridge object. This new API brings consistency to both the execution model and client code, greatly simplifying interaction with query results. =item Binding Result Values Binding result values returned by calling C<< $iterator->next >> are now HASH references (instead of ARRAY references), keyed by variable name. Where prior code might use this code (modulo model definition and namespace declarations): my $sparql = 'SELECT ?name ?homepage WHERE { [ foaf:name ?name ; foaf:homepage ?homepage ] }'; my $query = RDF::Query->new( $sparql ); my $iterator = $query->execute( $model ); while (my $row = $iterator->()) { my ($name, $homepage) = @$row; # ... } New code using RDF::Query 2.000 and later should instead use: my $sparql = 'SELECT ?name ?homepage WHERE { [ foaf:name ?name ; foaf:homepage ?homepage ] }'; my $query = RDF::Query->new( $sparql ); my $iterator = $query->execute( $model ); while (my $row = $iterator->next) { my $name = $row->{ name }; my $homepage = $row->{ homepage }; # ... } (Also notice the new method calling syntax for retrieving rows.) =back =cut package RDF::Query; use strict; use warnings; no warnings 'redefine'; use Carp qw(carp croak confess); use Data::Dumper; use LWP::UserAgent; use I18N::LangTags; use List::Util qw(first); use Scalar::Util qw(blessed reftype looks_like_number); use DateTime::Format::W3CDTF; use Log::Log4perl qw(:easy); Log::Log4perl->easy_init($ERROR); no warnings 'numeric'; use RDF::Trine 0.111; use RDF::Trine::Iterator qw(sgrep smap swatch); require RDF::Query::Functions; # (needs to happen at runtime because some of the functions rely on RDF::Query being fully loaded (to call add_hook(), for example)) # all the built-in functions including: # datatype casting, language ops, logical ops, # numeric ops, datetime ops, and node type testing # also, custom functions including: # jena:sha1sum, jena:now, jena:langeq, jena:listMember # ldodds:Distance, kasei:warn use RDF::Query::Expression; use RDF::Query::Algebra; use RDF::Query::Node; use RDF::Query::Parser::RDQL; use RDF::Query::Parser::SPARQL; use RDF::Query::Parser::SPARQLP; # local extensions to SPARQL use RDF::Query::Compiler::SQL; use RDF::Query::Error qw(:try); use RDF::Query::Logger; use RDF::Query::Plan; use RDF::Query::CostModel::Naive; use RDF::Query::CostModel::Counted; ###################################################################### our ($VERSION, $DEFAULT_PARSER); BEGIN { $VERSION = '2.200'; $DEFAULT_PARSER = 'sparql'; } ###################################################################### =head1 METHODS =over 4 =item C<< new ( $query, \%options ) >> =item C<< new ( $query, $baseuri, $languri, $lang, %options ) >> Returns a new RDF::Query object for the specified C<$query>. The query language defaults to SPARQL, but may be set specifically by specifying either C<$languri> or C<$lang>, whose acceptable values are: $lang: 'rdql', 'sparql', 'tsparql', or 'sparqlp' $languri: 'http://www.w3.org/TR/rdf-sparql-query/', or 'http://jena.hpl.hp.com/2003/07/query/RDQL' =cut sub new { my $class = shift; my $query = shift; my ($baseuri, $languri, $lang, %options); if (@_ and ref($_[0])) { %options = %{ shift() }; $lang = $options{ lang }; $baseuri = $options{ base }; } else { ($baseuri, $languri, $lang, %options) = @_; } $class->clear_error; my $l = Log::Log4perl->get_logger("rdf.query"); my $f = DateTime::Format::W3CDTF->new; no warnings 'uninitialized'; my %names = ( rdql => 'RDF::Query::Parser::RDQL', sparql => 'RDF::Query::Parser::SPARQL', tsparql => 'RDF::Query::Parser::SPARQLP', sparqlp => 'RDF::Query::Parser::SPARQLP', ); my %uris = ( 'http://jena.hpl.hp.com/2003/07/query/RDQL' => 'RDF::Query::Parser::RDQL', 'http://www.w3.org/TR/rdf-sparql-query/' => 'RDF::Query::Parser::SPARQL', ); if ($baseuri) { $baseuri = RDF::Query::Node::Resource->new( $baseuri ); } my $pclass = $names{ $lang } || $uris{ $languri } || $names{ $DEFAULT_PARSER }; my $parser = $pclass->new(); my $parsed = $parser->parse( $query, $baseuri ); my $ua = LWP::UserAgent->new( agent => "RDF::Query/${VERSION}" ); $ua->default_headers->push_header( 'Accept' => "application/sparql-results+xml;q=0.9,application/rdf+xml;q=0.5,text/turtle;q=0.7,text/xml" ); my $self = bless( { base => $baseuri, dateparser => $f, parser => $parser, parsed => $parsed, useragent => $ua, }, $class ); unless ($parsed->{'triples'}) { $class->set_error( $parser->error ); $l->debug($parser->error); return; } if ($options{net_filters}) { require JavaScript; $self->{options}{net_filters}++; } if ($options{trusted_keys}) { require Crypt::GPG; $self->{options}{trusted_keys} = $options{trusted_keys}; } if ($options{gpg}) { $self->{_gpg_obj} = delete $options{gpg}; } if (defined $options{keyring}) { $self->{options}{keyring} = $options{keyring}; } if (defined $options{secretkey}) { $self->{options}{secretkey} = $options{secretkey}; } if (defined $options{defines}) { @{ $self->{options} }{ keys %{ $options{defines} } } = values %{ $options{defines} }; } if ($options{logger}) { $l->debug("got external logger"); $self->{logger} = $options{logger}; } if ($options{costmodel}) { $l->debug("got cost model"); $self->{costmodel} = $options{costmodel}; } else { $self->{costmodel} = RDF::Query::CostModel::Naive->new(); } if (my $opt = $options{optimize}) { $l->debug("got optimization flag: $opt"); $self->{optimize} = $opt; } else { $self->{optimize} = 0; } if (my $opt = $options{force_no_optimization}) { $l->debug("got force_no_optimization flag"); $self->{force_no_optimization} = 1; } if (my $time = $options{optimistic_threshold_time}) { $l->debug("got optimistic_threshold_time flag"); $self->{optimistic_threshold_time} = $time; } # add rdf as a default namespace to RDQL queries if ($pclass eq 'RDF::Query::Parser::RDQL') { $self->{parsed}{namespaces}{rdf} = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'; } return $self; } =item C Executes the query using the specified model, and returns the first matching row as a LIST of values. =cut sub get { my $self = shift; my $stream = $self->execute( @_ ); my $row = $stream->next; if (ref($row)) { return @{ $row }{ $self->variables }; } else { return undef; } } =item C<< prepare ( $model ) >> Prepares the query, constructing a query execution plan, and returns a list containing ($plan, $context). To execute the plan, call C<< execute_plan( $plan, $context ) >>. =cut sub prepare { my $self = shift; my $model = shift; my %args = @_; my $l = Log::Log4perl->get_logger("rdf.query"); $self->{_query_cache} = {}; # a new scratch hash for each execution. my %bound = ($args{ 'bind' }) ? %{ $args{ 'bind' } } : (); my $errors = ($args{ 'strict_errors' }) ? 1 : 0; my $parsed = $self->{parsed}; my @vars = $self->variables( $parsed ); my $bridge = $self->{bridge} || $self->get_bridge( $model, %args ); if ($bridge) { $self->bridge( $bridge ); $l->debug("got bridge $bridge"); } else { throw RDF::Query::Error::ModelError ( -text => "Could not create a model object." ); } $l->trace("loading data"); $self->load_data(); $bridge = $self->bridge(); # reload the bridge object, because load_data might have changed it. $l->trace("constructing ExecutionContext"); my $context = RDF::Query::ExecutionContext->new( bound => \%bound, model => $bridge, query => $self, base => $parsed->{base}, ns => $parsed->{namespaces}, logger => $self->logger, costmodel => $self->costmodel, optimize => $self->{optimize}, force_no_optimization => $self->{force_no_optimization}, optimistic_threshold_time => $self->{optimistic_threshold_time} || 0, requested_variables => \@vars, model_optimize => 1, strict_errors => $errors, ); $self->{model} = $model; $l->trace("getting QEP..."); my $plan = $self->query_plan( $context ); $l->trace("-> done."); unless ($plan) { throw RDF::Query::Error::CompilationError -text => "Query didn't produce a valid execution plan"; } return ($plan, $context); } =item C Executes the query using the specified model. If called in a list context, returns an array of rows, otherwise returns an iterator. =cut sub execute { my $self = shift; my $model = shift; my %args = @_; my $l = Log::Log4perl->get_logger("rdf.query"); $l->debug("executing query with model " . ($model or '')); my ($plan, $context) = $self->prepare( $model, %args ); if ($l->is_trace) { $l->trace(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>"); $l->trace($self->as_sparql); $l->trace(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>"); } return $self->execute_plan( $plan, $context ); } =item C<< execute_plan ( $plan, $context ) >> Executes the query using the supplied ExecutionContext. If called in a list context, returns an array of rows, otherwise returns an iterator. =cut sub execute_plan { my $self = shift; my $plan = shift; my $context = shift; my $bridge = $context->model; my $parsed = $self->{parsed}; my @vars = $self->variables( $parsed ); my $l = Log::Log4perl->get_logger("rdf.query"); my $pattern = $self->pattern; # $l->trace("calling fixup()"); # my $cpattern = $self->fixup(); my @funcs = $pattern->referenced_functions; foreach my $f (@funcs) { $self->run_hook( 'http://kasei.us/code/rdf-query/hooks/function_init', $f ); } # RUN THE QUERY! $l->debug("executing the graph pattern"); my $options = $parsed->{options} || {}; if ($self->{options}{plan}) { warn $plan->sse({}, ''); } $plan->execute( $context ); my $stream = $plan->as_iterator( $context ); # my $stream = RDF::Trine::Iterator::Bindings->new( sub { $plan->next }, \@vars, distinct => $plan->distinct, sorted_by => $plan->ordered ); $l->debug("performing projection"); my $expr = 0; foreach my $v (@{ $parsed->{'variables'} }) { $expr = 1 if ($v->isa('RDF::Query::Expression::Alias')); } if ($parsed->{'method'} eq 'DESCRIBE') { $stream = $self->describe( $stream ); } elsif ($parsed->{'method'} eq 'ASK') { $stream = $self->ask( $stream ); } $l->debug("going to call post-execute hook"); $self->run_hook( 'http://kasei.us/code/rdf-query/hooks/post-execute', $bridge, $stream ); if (wantarray) { return $stream->get_all(); } else { return $stream; } } =item C<< execute_with_named_graphs ( $model, @uris ) >> Executes the query using the specified model, loading the contents of the specified C<@uris> into named graphs immediately prior to matching the query. Otherwise, acts just like C<< execute >>. =cut sub execute_with_named_graphs { my $self = shift; my $model = shift; my $l = Log::Log4perl->get_logger("rdf.query"); $self->{model} = $model; my $bridge = $self->get_bridge( $model ); if ($bridge) { $self->bridge( $bridge ); } else { throw RDF::Query::Error::ModelError ( -text => "Could not create a model object." ); } foreach my $gdata (@_) { $l->debug("-> adding graph data " . $gdata->uri_value); $self->parse_url( $gdata->uri_value, 1 ); } return $self->execute( $model ); } =begin private =item C<< query_plan ( $execution_context ) >> Returns a RDF::Query::Plan object that is (hopefully) the optimal QEP for the current query. =end private =cut sub query_plan { my $self = shift; my $context = shift; my $parsed = $self->{parsed}; my %constant_plan; if (my $b = $self->{parsed}{bindings}) { my $vars = $b->{vars}; my $values = $b->{terms}; my @names = map { $_->name } @{ $vars }; my @constants; while (my $values = shift(@{ $b->{terms} })) { my %bound; @bound{ @names } = @{ $values }; my $bound = RDF::Query::VariableBindings->new( \%bound ); push(@constants, $bound); } my $constant_plan = RDF::Query::Plan::Constant->new( @constants ); %constant_plan = ( constants => [ $constant_plan ] ); } my $algebra = $self->pattern; my $pclass = $self->plan_class; my @plans = $pclass->generate_plans( $algebra, $context, %constant_plan ); my $l = Log::Log4perl->get_logger("rdf.query.plan"); if (wantarray) { return @plans; } else { my ($plan) = $self->prune_plans( $context, @plans ); if ($l->is_debug) { $l->debug("using query plan: " . $plan->sse({}, '')); } return $plan; } } =begin private =item C<< plan_class >> Returns the class name for Plan generation. This method should be overloaded by RDF::Query subclasses if the implementation also provides a subclass of RDF::Query::Plan. =end private =cut sub plan_class { return 'RDF::Query::Plan'; } =begin private =item C<< prune_plans ( $context, @plans ) >> =end private =cut sub prune_plans { my $self = shift; my $context = shift; my @plans = @_; return $self->plan_class->prune_plans( $context, @plans ); } =begin private =item C Takes a stream of matching statements and constructs a DESCRIBE graph. =end private =cut sub describe { my $self = shift; my $stream = shift; my $bridge = $self->bridge; my @nodes; my %seen; while (my $row = $stream->next) { foreach my $v (@{ $self->{parsed}{variables} }) { if ($v->isa('RDF::Query::Node::Variable')) { my $node = $row->{ $v->name }; push(@nodes, $node) unless ($seen{ $bridge->as_string( $node ) }++); } elsif ($v->isa('RDF::Query::Node::Resource')) { push(@nodes, $v) unless ($seen{ $bridge->as_string( $v ) }++); } } } my @streams; $self->{'describe_nodes'} = []; foreach my $node (@nodes) { push(@{ $self->{'describe_nodes'} }, $node); push(@streams, $bridge->get_statements( $node, undef, undef, $self, {} )); push(@streams, $bridge->get_statements( undef, undef, $node, $self, {} )); } my $ret = sub { while (@streams) { my $val = $streams[0]->next; if (defined $val) { return $val; } else { shift(@streams); return undef if (not @streams); } } }; return RDF::Trine::Iterator::Graph->new( $ret, bridge => $bridge ); } =begin private =item C Takes a stream of matching statements and returns a boolean query result stream. =end private =cut sub ask { my $self = shift; my $stream = shift; my $value = $stream->next; my $bool = ($value) ? 1 : 0; return RDF::Trine::Iterator::Boolean->new( [ $bool ], bridge => $self->bridge ); } ###################################################################### =item C<< aggregate ( \@groupby, $alias => [ $op, $col ] ) >> =cut sub aggregate { my $self = shift; my $groupby = shift; my %aggs = @_; my $pattern = $self->pattern; my $p = $pattern; if ($p->isa('RDF::Query::Algebra::Project')) { $pattern = $p = $p->pattern; } if ($p->is_solution_modifier) { while ($p->pattern->is_solution_modifier) { if ($p->pattern->isa('RDF::Query::Algebra::Project')) { $p->pattern( $p->pattern->pattern ); } $p = $p->pattern; } } my $head = ($p->is_solution_modifier) ? 1 : 0; my $child = ($head) ? $p->pattern : $p; my $agg = RDF::Query::Algebra::Aggregate->new( $child, $groupby, %aggs ); my $top; if ($head) { $p->pattern( $agg ); $top = $pattern; } else { $top = $agg; } $self->{parsed}{triples} = [ $top ]; $self->{parsed}{'variables'} = [ map { ref($_) ? $_ : RDF::Query::Node::Variable->new( $_ ) } (@$groupby, keys %aggs) ]; } =item C<< pattern >> Returns the RDF::Query::Algebra::GroupGraphPattern algebra pattern for this query. =cut sub pattern { my $self = shift; my $parsed = $self->parsed; my @triples = @{ $parsed->{triples} }; if (scalar(@triples) == 1 and ($triples[0]->isa('RDF::Query::Algebra::GroupGraphPattern') or $triples[0]->isa('RDF::Query::Algebra::Filter') or $triples[0]->isa('RDF::Query::Algebra::Sort') or $triples[0]->isa('RDF::Query::Algebra::Limit') or $triples[0]->isa('RDF::Query::Algebra::Offset') or $triples[0]->isa('RDF::Query::Algebra::Distinct') or $triples[0]->isa('RDF::Query::Algebra::Project') or $triples[0]->isa('RDF::Query::Algebra::Construct') )) { my $ggp = $triples[0]; return $ggp; } else { return RDF::Query::Algebra::GroupGraphPattern->new( @triples ); } } =item C<< as_sparql >> Returns the query as a string in the SPARQL syntax. =cut sub as_sparql { my $self = shift; my $parsed = $self->parsed; my $context = { namespaces => $self->{parsed}{namespaces} }; my $method = $parsed->{method}; my @vars = map { $_->as_sparql( $context, '' ) } @{ $parsed->{ variables } }; my $vars = join(' ', @vars); my $ggp = $self->pattern; { my $pvars = join(' ', sort $ggp->referenced_variables); my $svars = join(' ', sort map { $_->name } @{ $parsed->{ variables } }); if ($pvars eq $svars) { $vars = '*'; } } my @ns = map { "PREFIX $_: <$parsed->{namespaces}{$_}>" } (sort keys %{ $parsed->{namespaces} }); my @mod; if (my $ob = $parsed->{options}{orderby}) { push(@mod, 'ORDER BY ' . join(' ', map { my ($dir,$v) = @$_; ($dir eq 'ASC') ? $v->as_sparql( $context, '' ) : "${dir}" . $v->as_sparql( $context, '' ); } @$ob)); } if (my $l = $parsed->{options}{limit}) { push(@mod, "LIMIT $l"); } if (my $o = $parsed->{options}{offset}) { push(@mod, "OFFSET $o"); } my $mod = join("\n", @mod); my $methoddata = ''; if ($method eq 'SELECT') { $methoddata = $method; } elsif ($method eq 'ASK') { $methoddata = $method; } elsif ($method eq 'DESCRIBE') { $methoddata = sprintf("%s %s\nWHERE", $method, $vars); } my $sparql = sprintf( "%s\n%s %s\n%s", join("\n", @ns), $methoddata, $ggp->as_sparql( $context, '' ), $mod, ); chomp($sparql); return $sparql; } =item C<< sse >> Returns the query as a string in the SSE syntax. =cut sub sse { my $self = shift; my $parsed = $self->parsed; my $ggp = $self->pattern; my $ns = $parsed->{namespaces}; my $nscount = scalar(@{ [ keys %$ns ] }); my $base = $parsed->{base}; my $indent = ' '; my $context = { namespaces => $ns, indent => $indent }; my $indentcount = 0; $indentcount++ if ($base); $indentcount++ if ($nscount); my $prefix = $indent x $indentcount; my $sse = $ggp->sse( $context, $prefix ); if ($nscount) { $sse = sprintf("(prefix (%s)\n${prefix}%s)", join("\n${indent}" . ' 'x9, map { "(${_}: <$ns->{$_}>)" } (sort keys %$ns)), $sse); } if ($base) { $sse = sprintf("(base <%s>\n${indent}%s)", $base->uri_value, $sse); } chomp($sse); return $sse; } =begin private =item C Returns a boolean value representing the support of $feature for the given model. =end private =cut sub supports { my $self = shift; my $model = shift; my $bridge = $self->get_bridge( $model ); return $bridge->supports( @_ ); } =begin private =item C Returns the class name of a model backend that is present and loadable on the system. =end private =cut sub loadable_bridge_class { my $self = shift; my $l = Log::Log4perl->get_logger("rdf.query"); if (not $ENV{RDFQUERY_NO_RDFTRINE}) { eval "use RDF::Query::Model::RDFTrine;"; if (RDF::Query::Model::RDFTrine->can('new')) { return 'RDF::Query::Model::RDFTrine'; } else { $l->debug("RDF::Query::Model::RDFTrine didn't load cleanly"); } } else { $l->debug("RDF::Trine supressed"); } if (not $ENV{RDFQUERY_NO_REDLAND}) { eval "use RDF::Query::Model::Redland;"; if (RDF::Query::Model::Redland->can('new')) { return 'RDF::Query::Model::Redland'; } else { $l->debug("RDF::Query::Model::Redland didn't load cleanly"); } } else { $l->debug("RDF::Redland supressed"); } if (not $ENV{RDFQUERY_NO_RDFCORE}) { eval "use RDF::Query::Model::RDFCore;"; if (RDF::Query::Model::RDFCore->can('new')) { return 'RDF::Query::Model::RDFCore'; } else { $l->debug("RDF::Query::Model::RDFCore didn't load cleanly"); } } else { $l->debug("RDF::Core supressed"); } return undef; } =begin private =item C Returns a new bridge object representing a new, empty model. =end private =cut sub new_bridge { my $self = shift; my $bridge_class = $self->loadable_bridge_class; if ($bridge_class) { return $bridge_class->new(); } else { return undef; } } =begin private =item C Returns a bridge object for the specified model object. =end private =cut sub get_bridge { my $self = shift; my $model = shift; my %args = @_; my $parsed = ref($self) ? $self->{parsed} : undef; my $bridge; if (not $model) { $bridge = $self->new_bridge(); } elsif (($model->isa('RDF::Trine::Model'))) { require RDF::Query::Model::RDFTrine; $bridge = RDF::Query::Model::RDFTrine->new( $model, parsed => $parsed ); } elsif ($model->isa('RDF::Redland::Model')) { require RDF::Query::Model::Redland; $bridge = RDF::Query::Model::Redland->new( $model, parsed => $parsed ); } elsif ($model->isa('RDF::Core::Model')) { require RDF::Query::Model::RDFCore; $bridge = RDF::Query::Model::RDFCore->new( $model, parsed => $parsed ); } else { require Data::Dumper; Carp::confess "unknown model type: " . Data::Dumper::Dumper($model); } return $bridge; } =begin private =item C<< load_data >> Loads any external data required by this query (FROM and FROM NAMED clauses). =end private =cut sub load_data { my $self = shift; my $bridge = $self->bridge; my $parsed = $self->{parsed}; ## LOAD ANY EXTERNAL RDF FILES my $sources = $parsed->{'sources'}; if (ref($sources) and reftype($sources) eq 'ARRAY') { my $need_new_bridge = 1; my $named_query = 0; # put non-named sources first, because they will cause a new bridge to be # constructed. subsequent named data will then be loaded into the correct # bridge object. my @sources = sort { @$a == 2 } @$sources; foreach my $source (@sources) { my $named_source = (2 == @{$source} and $source->[1] eq 'NAMED'); if ((not $named_source) and $need_new_bridge) { # query uses FROM <..> clauses, so create a new bridge so we don't add the statements to a persistent default graph $bridge = $self->new_bridge(); $self->bridge( $bridge ); $need_new_bridge = 0; } my $uri = $source->[0]->uri_value; $self->parse_url( $uri, $named_source ); } $self->run_hook( 'http://kasei.us/code/rdf-query/hooks/post-create-model', $bridge ); } } =item C<< algebra_fixup ( $algebra, $bridge, $base, $ns ) >> Called in the fixup method of ::Algebra classes, returns either an optimized ::Algebra object ready for execution, or undef (in which case it will be prepared for execution by the ::Algebra::* class itself. =cut sub algebra_fixup { my $self = shift; my $pattern = shift; my $bridge = shift; my $base = shift; my $ns = shift; return if ($self->{force_no_optimization}); return $bridge->fixup( $pattern, $self, $base, $ns ); } =begin private =item C<< var_or_expr_value ( $bridge, \%bound, $value ) >> Returns an (non-variable) RDF::Query::Node value based on C<< $value >>. If C<< $value >> is a node object, it is simply returned. If it is an RDF::Query::Node::Variable object, the corresponding value in C<< \%bound >> is returned. If it is an RDF::Query::Expression object, the expression is evaluated using C<< \%bound >>, and the resulting value is returned. =end private =cut sub var_or_expr_value { my $self = shift; my $bridge = shift; my $bound = shift; my $v = shift; if ($v->isa('RDF::Query::Expression')) { return $v->evaluate( $self, $bridge, $bound ); } elsif ($v->isa('RDF::Trine::Node::Variable')) { return $bound->{ $v->name }; } elsif ($v->isa('RDF::Query::Node')) { return $v; } else { warn Dumper($v, $bound); throw RDF::Query::Error -text => 'Not an expression or node value'; } } =item C Associates the custom function C<$function> (a CODE reference) with the specified URI, allowing the function to be called by query FILTERs. =cut sub add_function { my $self = shift; my $uri = shift; my $code = shift; if (ref($self)) { $self->{'functions'}{$uri} = $code; } else { our %functions; $RDF::Query::functions{ $uri } = $code; } } =item C<< supported_extensions >> Returns a list of URLs representing extensions to SPARQL that are supported by the query engine. =cut sub supported_extensions { my $self = shift; return qw( http://kasei.us/2008/04/sparql-extension/service http://kasei.us/2008/04/sparql-extension/service/bloom_filters http://kasei.us/2008/04/sparql-extension/unsaid http://kasei.us/2008/04/sparql-extension/federate_bindings http://kasei.us/2008/04/sparql-extension/select_expression http://kasei.us/2008/04/sparql-extension/aggregate http://kasei.us/2008/04/sparql-extension/aggregate/count http://kasei.us/2008/04/sparql-extension/aggregate/count-distinct http://kasei.us/2008/04/sparql-extension/aggregate/min http://kasei.us/2008/04/sparql-extension/aggregate/max ); } =item C<< supported_functions >> Returns a list URLs that may be used as functions in FILTER clauses (and the SELECT clause if the SPARQLP parser is used). =cut sub supported_functions { my $self = shift; my @funcs; if (blessed($self)) { push(@funcs, keys %{ $self->{'functions'} }); } push(@funcs, keys %RDF::Query::functions); return grep { not(/^sparql:/) } @funcs; } =begin private =item C If C<$uri> is associated with a query function, returns a CODE reference to the function. Otherwise returns C. =end private =cut sub get_function { my $self = shift; my $uri = shift; my %args = @_; my $l = Log::Log4perl->get_logger("rdf.query"); $l->debug("trying to get function from $uri"); if (blessed($uri) and $uri->isa('RDF::Query::Node::Resource')) { $uri = $uri->uri_value; } my $func; if (ref($self)) { $func = $self->{'functions'}{$uri} || $RDF::Query::functions{ $uri }; } else { $func = $RDF::Query::functions{ $uri }; } if ($func) { return $func; } elsif (ref($self) and $self->{options}{net_filters}) { return $self->net_filter_function( $uri, %args ); } return; } =begin private =item C<< call_function ( $bridge, $bound, $uri, @args ) >> If C<$uri> is associated with a query function, calls the function with the supplied arguments. =end private =cut sub call_function { my $self = shift; my $bridge = shift; my $bound = shift; my $uri = shift; my $l = Log::Log4perl->get_logger("rdf.query"); $l->debug("trying to get function from $uri"); my $filter = RDF::Query::Expression::Function->new( $uri, @_ ); return $filter->evaluate( $self, $bridge, $bound ); } =item C<< add_computed_statement_generator ( \&generator ) >> Adds a statement generator to the query object. This statement generator will be called as C<< $generator->( $query, $bridge, \%bound, $s, $p, $o, $c ) >> and is expected to return an RDF::Trine::Iterator::Graph object. =cut sub add_computed_statement_generator { my $self = shift; my $gen = shift; push( @{ $self->{'computed_statement_generators'} }, $gen ); } =item C<< get_computed_statement_generators >> Returns an ARRAY reference of computed statement generator closures. =cut sub get_computed_statement_generators { my $self = shift; my $comps = $self->{'computed_statement_generators'} || []; return $comps; } =item C<< net_filter_function ( $uri ) >> Takes a URI specifying the location of a javascript implementation. Returns a code reference implementing the javascript function. If the 'trusted_keys' option is set, a GPG signature at ${uri}.asc is retrieved and verified against the arrayref of trusted key fingerprints. A code reference is returned only if a trusted signature is found. =cut sub net_filter_function { my $self = shift; my $uri = shift; my %args = @_; my $l = Log::Log4perl->get_logger("rdf.query"); $l->debug("fetching $uri"); my $bridge = $self->new_bridge(); $bridge->add_uri( $uri ); my $subj = $bridge->new_resource( $uri ); my $func = do { my $pred = $bridge->new_resource('http://www.mindswap.org/~gtw/sparql#function'); my $stream = $bridge->get_statements( $subj, $pred, undef, $self, {} ); my $st = $stream->(); my $obj = $bridge->object( $st ); my $func = $bridge->literal_value( $obj ); }; my $impl = do { my $pred = $bridge->new_resource('http://www.mindswap.org/~gtw/sparql#source'); my $stream = $bridge->get_statements( $subj, $pred, undef, $self, {} ); my $st = $stream->(); my $obj = $bridge->object( $st ); my $impl = $bridge->uri_value( $obj ); }; my $resp = $self->useragent->get( $impl ); unless ($resp->is_success) { warn "No content available from $uri: " . $resp->status_line; return; } my $content = $resp->content; if ($self->{options}{trusted_keys}) { my $gpg = $self->{_gpg_obj} || new Crypt::GPG; $gpg->gpgbin('/sw/bin/gpg'); $gpg->secretkey($self->{options}{secretkey} || $ENV{GPG_KEY} || '0xCAA8C82D'); my $keyring = exists($self->{options}{keyring}) ? $self->{options}{keyring} : File::Spec->catfile($ENV{HOME}, '.gnupg', 'pubring.gpg'); $gpg->gpgopts("--lock-multiple --keyring " . $keyring); my $sigresp = $self->useragent->get( "${impl}.asc" ); # if (not $sigresp) { # throw RDF::Query::Error::ExecutionError -text => "Required signature not found: ${impl}.asc\n"; if ($sigresp->is_success) { my $sig = $sigresp->content; my $ok = $self->_is_trusted( $gpg, $content, $sig, $self->{options}{trusted_keys} ); unless ($ok) { throw RDF::Query::Error::ExecutionError -text => "Not a trusted signature"; } } else { throw RDF::Query::Error::ExecutionError -text => "Could not retrieve required signature: ${uri}.asc"; return; } } my ($rt, $cx) = $self->new_javascript_engine(%args); my $r = $cx->eval( $content ); # die "Requested function URL does not match the function's URI" unless ($meta->{uri} eq $url); return sub { my $query = shift; my $bridge = shift; $l->debug("Calling javascript function $func with: " . Dumper(\@_)); my $value = $cx->call( $func, @_ ); $l->debug("--> $value"); return $value; }; } sub _is_trusted { my $self = shift; my $gpg = shift; my $file = shift; my $sigfile = shift; my $trusted = shift; my (undef, $sig) = $gpg->verify($sigfile, $file); return 0 unless ($sig->validity eq 'GOOD'); my $id = $sig->keyid; my @keys = $gpg->keydb($id); foreach my $key (@keys) { my $fp = $key->{Fingerprint}; $fp =~ s/ //g; return 1 if (first { s/ //g; $_ eq $fp } @$trusted); } return 0; } =begin private =item C Returns a new JavaScript Runtime and Context object for running network FILTER functions. =end private =cut sub new_javascript_engine { my $self = shift; my %args = @_; my $bridge = $args{bridge}; my $l = Log::Log4perl->get_logger("rdf.query"); my $rt = JavaScript::Runtime->new(); my $cx = $rt->create_context(); my $meta = $bridge->meta; $cx->bind_function( 'warn' => sub { $l->debug(@_) } ); $cx->bind_function( '_warn' => sub { $l->debug(@_) } ); $cx->bind_function( 'makeTerm' => sub { my $term = shift; my $lang = shift; my $dt = shift; # warn 'makeTerm: ' . Dumper($term); if (not blessed($term)) { my $node = $bridge->new_literal( $term, $lang, $dt ); return $node; } else { return $term; } } ); my $toString = sub { my $string = $bridge->literal_value( @_ ) . ''; return $string; }; $cx->bind_class( name => 'RDFNode', constructor => sub {}, 'package' => $meta->{node}, 'methods' => { is_literal => sub { return $bridge->is_literal( $_[0] ) }, is_resource => sub { return $bridge->is_resource( $_[0] ) }, is_blank => sub { return $bridge->is_blank( $_[0] ) }, toString => $toString, }, ps => { literal_value => [sub { return $bridge->literal_value($_[0]) }], literal_datatype => [sub { return $bridge->literal_datatype($_[0]) }], literal_value_language => [sub { return $bridge->literal_value_language($_[0]) }], uri_value => [sub { return $bridge->uri_value($_[0]) }], blank_identifier => [sub { return $bridge->blank_identifier($_[0]) }], }, ); if ($meta->{literal} ne $meta->{node}) { $cx->bind_class( name => 'RDFLiteral', constructor => sub {}, 'package' => $bridge->meta->{literal}, 'methods' => { is_literal => sub { return 1 }, is_resource => sub { return 0 }, is_blank => sub { return 0 }, toString => $toString, }, ps => { literal_value => [sub { return $bridge->literal_value($_[0]) }], literal_datatype => [sub { return $bridge->literal_datatype($_[0]) }], literal_value_language => [sub { return $bridge->literal_value_language($_[0]) }], }, ); # $cx->eval( 'RDFLiteral.prototype.__proto__ = RDFNode.prototype;' ); } if ($meta->{resource} ne $meta->{node}) { $cx->bind_class( name => 'RDFResource', constructor => sub {}, 'package' => $bridge->meta->{resource}, 'methods' => { is_literal => sub { return 0 }, is_resource => sub { return 1 }, is_blank => sub { return 0 }, toString => $toString, }, ps => { uri_value => [sub { return $bridge->uri_value($_[0]) }], }, ); # $cx->eval( 'RDFResource.prototype.__proto__ = RDFNode.prototype;' ); } if ($meta->{blank} ne $meta->{node}) { $cx->bind_class( name => 'RDFBlank', constructor => sub {}, 'package' => $bridge->meta->{blank}, 'methods' => { is_literal => sub { return 0 }, is_resource => sub { return 0 }, is_blank => sub { return 1 }, toString => $toString, }, ps => { blank_identifier => [sub { return $bridge->blank_identifier($_[0]) }], }, ); # $cx->eval( 'RDFBlank.prototype.__proto__ = RDFNode.prototype;' ); } return ($rt, $cx); } =item C<< add_hook_once ( $hook_uri, $function, $token ) >> Calls C<< add_hook >> adding the supplied C<< $function >> only once based on the C<< $token >> identifier. This may be useful if the only code that is able to add a hook is called many times (in an extension function, for example). =cut sub add_hook_once { my $self = shift; my $uri = shift; my $code = shift; my $token = shift; unless ($self->{'hooks_once'}{ $token }++) { $self->add_hook( $uri, $code ); } } =item C<< add_hook ( $hook_uri, $function ) >> Associates the custom function C<$function> (a CODE reference) with the RDF::Query code hook specified by C<$uri>. Each function that has been associated with a particular hook will be called (in the order they were registered as hooks) when the hook event occurs. See L for more information. =cut sub add_hook { my $self = shift; my $uri = shift; my $code = shift; if (ref($self)) { push(@{ $self->{'hooks'}{$uri} }, $code); } else { our %hooks; push(@{ $RDF::Query::hooks{ $uri } }, $code); } } =begin private =item C If C<$uri> is associated with any query callback functions ("hooks"), returns an ARRAY reference to the functions. If no hooks are associated with C<$uri>, returns a reference to an empty array. =end private =cut sub get_hooks { my $self = shift; my $uri = shift; my $func = $self->{'hooks'}{ $uri } || $RDF::Query::hooks{ $uri } || []; return $func; } =begin private =item C Calls any query callback functions associated with C<$uri>. Each callback is called with the query object as the first argument, followed by any caller-supplied arguments from C<@args>. =end private =cut sub run_hook { my $self = shift; my $uri = shift; my @args = @_; my $hooks = $self->get_hooks( $uri ); foreach my $hook (@$hooks) { $hook->( $self, @args ); } } =begin private =item C Retrieve a remote file by URL, and parse RDF into the RDF store. If $named is TRUE, associate all parsed triples with a named graph. =end private =cut sub parse_url { my $self = shift; my $url = shift; my $named = shift; my $bridge = $self->bridge; $bridge->add_uri( $url, $named ); } =begin private =item C Returns a list of the ordered variables the query is selecting. =end private =cut sub variables { my $self = shift; my $parsed = shift || $self->parsed; my @vars = map { $_->name } grep { $_->isa('RDF::Query::Node::Variable') or $_->isa('RDF::Query::Expression::Alias') } @{ $parsed->{'variables'} }; return @vars; } =item C Returns the parse tree. =cut sub parsed { my $self = shift; if (@_) { $self->{parsed} = shift; } return $self->{parsed}; } =item C Returns the model bridge of the default graph. =cut sub bridge { my $self = shift; if (@_) { $self->{bridge} = shift; } my $bridge = $self->{bridge}; unless (defined $bridge) { $bridge = $self->get_bridge(); } return $bridge; } =item C<< useragent >> Returns the LWP::UserAgent object used for retrieving web content. =cut sub useragent { my $self = shift; return $self->{useragent}; } =item C<< log ( $key [, $value ] ) >> If no logger object is associated with this query object, does nothing. Otherwise, return or set the corresponding value depending on whether a C<< $value >> is specified. =cut sub log { my $self = shift; if (blessed(my $l = $self->{ logger })) { $l->log( @_ ); } } =item C<< logger >> Returns the logger object associated with this query object (if present). =cut sub logger { my $self = shift; return $self->{ logger }; } =item C<< costmodel >> Returns the RDF::Query::CostModel object associated with this query object (if present). =cut sub costmodel { my $self = shift; return $self->{ costmodel }; } =item C Returns the last error the parser experienced. =cut sub error { my $self = shift; if (blessed($self)) { return $self->{error}; } else { our $_ERROR; return $_ERROR; } } sub _uniq { my %seen; my @data; foreach (@_) { push(@data, $_) unless ($seen{ $_ }++); } return @data; } =begin private =item C Sets the object's error variable. =end private =cut sub set_error { my $self = shift; my $error = shift; if (blessed($self)) { $self->{error} = $error; } our $_ERROR = $error; } =begin private =item C Clears the object's error variable. =end private =cut sub clear_error { my $self = shift; if (blessed($self)) { $self->{error} = undef; } our $_ERROR; undef $_ERROR; } =begin private =item C<_debug_closure ( $code )> Debugging function to print out a deparsed (textual) version of a closure. =end private =cut sub _debug_closure { my $closure = shift; my $l = Log::Log4perl->get_logger("rdf.query"); if ($l->is_trace) { require B::Deparse; my $deparse = B::Deparse->new("-p", "-sC"); my $body = $deparse->coderef2text($closure); $l->trace("--- --- CLOSURE --- ---"); $l->logcluck($body); } } 1; __END__ =back =head1 REQUIRES =over 4 =item * L =item * L =item * L =item * L =item * L =item * L =item * L =item * L =item * L =item * L =item * L =item * L =item * L =item * L =item * L or L for optional model support. =back =head1 DEFINED HOOKS The following hook URIs are defined and may be used to extend the query engine functionality using the C<< add_hook >> method: =over 4 =item http://kasei.us/code/rdf-query/hooks/post-create-model Called after loading all external files to a temporary model in queries that use FROM and FROM NAMED. Args: ( $query, $bridge ) C<$query> is the RDF::Query object. C<$bridge> is the model bridge (RDF::Query::Model::*) object. =item http://kasei.us/code/rdf-query/hooks/post-execute Called immediately before returning a result iterator from the execute method. Args: ( $query, $bridge, $iterator ) C<$query> is the RDF::Query object. C<$bridge> is the model bridge (RDF::Query::Model::*) object. C<$iterator> is a RDF::Trine::Iterator object. =back =head1 AUTHOR Gregory Todd Williams =head1 COPYRIGHT Copyright (c) 2005-2009 Gregory Todd Williams. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut