package RDF::Notation3; require 5.005_62; use strict; #use warnings; use vars qw($VERSION); use File::Spec::Functions (); use Carp; use RDF::Notation3::ReaderFile; use RDF::Notation3::ReaderString; $VERSION = '0.91'; ############################################################ sub new { my ($class) = @_; my $self = { ansuri => '#', quantif => 1, nIDpref => '_:a', # this fits to RDF:Core prefix for nodeID }; bless $self, $class; return $self; } sub parse_file { my ($self, $path) = @_; $self->_define; my $fh; if (ref $path eq 'IO::File') { $fh = $path; } else { open(FILE, "$path") or $self->_do_error(2, $path); $fh = *FILE; } my $t = new RDF::Notation3::ReaderFile($fh); $self->{reader} = $t; $self->_document; close (FILE); } sub parse_string { my ($self, $str) = @_; $self->_define; my $t = new RDF::Notation3::ReaderString($str); $self->{reader} = $t; $self->_document; } sub anonymous_ns_uri { my ($self, $uri) = @_; if (@_ > 1) { $self->{ansuri} = $uri; } else { return $self->{ansuri}; } } sub quantification { my ($self, $val) = @_; if (@_ > 1) { $self->_do_error(4, $val) unless $val == 1 || $val == 0; $self->{quantif} = $val; } else { return $self->{quantif}; } } sub _define { my ($self) = @_; $self->{ns} = {}; $self->{context} = '<>'; $self->{gid} = 1; $self->{cid} = 1; $self->{hardns} = { rdf => ['rdf','http://www.w3.org/1999/02/22-rdf-syntax-ns#'], daml => ['daml','http://www.daml.org/2001/03/daml+oil#'], log => ['log','http://www.w3.org/2000/10/swap/log.n3#'], }; $self->{keywords} = []; } sub _document { my ($self) = @_; my $next = $self->{reader}->try; #print ">doc starts: $next\n"; if ($next ne ' EOF ') { $self->_statement_list; } #print ">end\n"; } sub _statement_list { my ($self) = @_; my $next = $self->_eat_EOLs; #print ">statement list: $next\n"; while ($next ne ' EOF ') { if ($next =~ /^(?:|#.*)$/) { $self->_space; } elsif ($next =~ /^}/) { #print ">end of nested statement list: $next\n"; last; } else { $self->_statement; } $next = $self->_eat_EOLs; } #print ">end of statement list: $next\n"; } sub _space { my ($self) = @_; #print ">space: "; my $tk = $self->{reader}->get; # comment or empty string while ($tk ne ' EOL ') { #print ">$tk "; $tk = $self->{reader}->get; } #print ">\n"; } sub _statement { my ($self, $subject) = @_; my $next = $self->{reader}->try; #print ">statement starts: $next\n"; if ($next =~ /^\@prefix|\@keywords|bind$/) { $self->_directive; } else { $subject = $self->_node unless $subject; #print ">subject: $subject\n"; my $properties = []; $self->_property_list($properties); #print ">CONTEXT: $self->{context}\n"; #print ">SUBJECT: $subject\n"; #print ">PROPERTY: void\n" unless @$properties; #foreach (@$properties) { # comment/uncomment by hand #print ">PROPERTY: ", join('-', @$_), "\n"; #} $self->_process_statement($subject, $properties) if @$properties; } # next step $next = $self->_eat_EOLs; if ($next eq '.') { $self->{reader}->get; } elsif ($next =~ /^\.(.*)$/) { $self->{reader}->get; unshift @{$self->{reader}->{tokens}}, $1; } elsif ($next =~ /^(?:\]|\)|\})/) { } else { $self->_do_error(115,$next); } } sub _node { my ($self) = @_; my $next = $self->_eat_EOLs; #print ">node: $next\n"; if ($next =~ /^[\[\{\(]/) { #print ">node is anonnode\n"; return $self->_anonymous_node; } elsif ($next eq 'this') { #print ">this\n"; $self->{reader}->get; return "$self->{context}"; } elsif ($next =~ /^(<[^>]*>|^(?:[_a-zA-Z]\w*)?:[_a-zA-Z][_\w]*)(.*)$/) { #print ">node is uri_ref2: $next\n"; if ($2) { $self->{reader}->get; unshift @{$self->{reader}->{tokens}}, $2; unshift @{$self->{reader}->{tokens}}, $1; #print ">cleaned uri_ref2: $1\n"; } return $self->_uri_ref2; } elsif ($self->{keywords}[0] && ($next =~ /^(^[_a-zA-Z][_\w]*)(.*)$/)) { #print ">node is uri_ref_kw: $next\n"; $self->{reader}->get; unshift @{$self->{reader}->{tokens}}, $2 if $2; unshift @{$self->{reader}->{tokens}}, ':' . $1; #print ">cleaned uri_ref2: $1\n"; return $self->_uri_ref2; } else { #print ">unknown node: $next\n"; $self->_do_error(116,$next); } } sub _directive { my ($self) = @_; my $tk = $self->{reader}->get; #print ">directive: $tk\n"; if ($tk eq '@prefix') { my $tk = $self->{reader}->get; if ($tk =~ /^([_a-zA-Z]\w*)?:$/) { my $pref = $1; #print ">nprefix: $pref\n" if $pref; my $ns_uri = $self->_uri_ref2; $ns_uri =~ s/^<(.*)>$/$1/; if ($pref) { $self->{ns}->{$self->{context}}->{$pref} = $ns_uri; } else { $self->{ns}->{$self->{context}}->{''} = $ns_uri; } } else { $self->_do_error(102,$tk); } } elsif ($tk eq '@keywords') { my $kw = $self->{reader}->get; while ($kw =~ /,$/) { $kw =~ s/,$//; push @{$self->{keywords}}, $kw; $kw = $self->{reader}->get; } if ($kw =~ /^(.+)\.$/) { push @{$self->{keywords}}, $1; unshift @{$self->{reader}{tokens}}, '.'; } else { $self->_do_error(117,$tk); } #print ">keywords: ", join('|', @{$self->{keywords}}), "\n"; } else { $self->_do_error(101,$tk); } } sub _uri_ref2 { my ($self) = @_; # possible end of statement, a simple . check is done my $next = $self->{reader}->try; if ($next =~ /^(.+)\.$/) { $self->{reader}->{tokens}->[0] = '.'; unshift @{$self->{reader}->{tokens}}, $1; } my $tk = $self->{reader}->get; #print ">uri_ref2: $tk\n"; if ($tk =~ /^<[^>]*>$/) { #print ">URI\n"; return $tk; } elsif ($tk =~ /^([_a-zA-Z]\w*)?:[a-zA-Z]\w*$/) { #print ">qname ($1:)\n" if $1; my $pref = ''; $pref = $1 if $1; if ($pref eq '_') { # workaround to parse N-Triples $self->{ns}->{$self->{context}}->{_} = $self->{ansuri} unless $self->{ns}->{$self->{context}}->{_}; } # Identifier demunging $tk = _unesc_qname($tk) if $tk =~ /_/; return $tk; } else { $self->_do_error(103,$tk); } } sub _property_list { my ($self, $properties) = @_; my $next = $self->_eat_EOLs; #print ">property list: $next\n"; $next = $self->_check_inline_comment($next); if ($next =~ /^:-/) { #print ">anonnode\n"; # TBD $self->_do_error(199, $next); } elsif ($next =~ /^\./) { #print ">void prop_list\n"; # TBD } else { #print ">prop_list with verb\n"; my $property = $self->_verb; #print ">property is back: $property\n"; my $objects = []; $self->_object_list($objects); unshift @$objects, $property; unshift @$objects, 'i' if ($next eq 'is' or $next eq '<-'); #print ">inverse mode\n" if ($next eq 'is' or $next eq '<-'); push @$properties, $objects; } # next step $next = $self->_eat_EOLs; if ($next eq ';') { $self->{reader}->get; $self->_property_list($properties); } } sub _verb { my ($self) = @_; my $next = $self->{reader}->try; #print ">verb: $next\n"; if ($next eq 'has') { $self->{reader}->get; return $self->_node; } elsif ($next eq '>-') { $self->{reader}->get; my $node = $self->_node; my $tk = $self->{reader}->get; $self->_do_error(104,$tk) unless $tk eq '->'; return $node; } elsif ($next eq 'is') { $self->{reader}->get; my $node = $self->_node; my $tk = $self->{reader}->get; $self->_do_error(109,$tk) unless $tk eq 'of'; return $node; } elsif ($next eq '<-') { $self->{reader}->get; my $node = $self->_node; my $tk = $self->{reader}->get; $self->_do_error(110,$tk) unless $tk eq '-<'; return $node; } elsif ($next eq 'a') { $self->{reader}->get; return $self->_built_in_verb('rdf','type'); # return '' } elsif ($next =~ /^=(.*)/) { $self->{reader}->get; unshift @{$self->{reader}->{tokens}}, $1 if $1; return $self->_built_in_verb('daml','equivalentTo'); # return ''; } else { #print ">property: $next\n"; return $self->_node; } } sub _object_list { my ($self, $objects) = @_; my $next = $self->_eat_EOLs; #print ">object list: $next\n"; $next = $self->_check_inline_comment($next); # possible end of entity, check for sticked next char is done while ($next =~ /^([^"]+)([,;\.\}\]\)])$/) { $self->{reader}->{tokens}->[0] = $2; unshift @{$self->{reader}->{tokens}}, $1; $next = $1; } my $obj = $self->_object; #print ">object is back: $obj\n"; push @$objects, $obj; # next step $next = $self->_eat_EOLs; if ($next eq ',') { $self->{reader}->get; $self->_object_list($objects); } } sub _object { my ($self) = @_; my $next = $self->_eat_EOLs; #print ">object: $next:\n"; if ($next =~ /^("(?:\\"|[^\"])*")([\.;,\]\}\)])*$/) { #print ">complete string1: $next\n"; my $tk = $self->{reader}->get; unshift @{$self->{reader}->{tokens}}, $2 if $2; return $self->_unesc_string($1); } else { #print ">object is node: $next\n"; $self->_node; } } sub _anonymous_node { my ($self) = @_; my $next = $self->{reader}->try; $next =~ /^([\[\{\(])(.*)$/; #print ">anonnode1: $1\n"; #print ">anonnode2: $2\n"; $self->{reader}->get; unshift @{$self->{reader}->{tokens}}, $2 if $2; if ($1 eq '[') { #print ">anonnode: []\n"; my $genid = "<$self->{ansuri}g_$self->{gid}>"; $self->{gid}++; $next = $self->_eat_EOLs; if ($next =~ /^\](.)*$/) { $self->_exist_quantif($genid); } else { $self->_exist_quantif($genid); $self->_statement($genid); } # next step $next = $self->_eat_EOLs; my $tk = $self->{reader}->get; if ($tk =~ /^\](.+)$/) { unshift @{$self->{reader}->{tokens}}, $1; } elsif ($tk ne ']') { $self->_do_error(107, $tk); } return $genid; } elsif ($1 eq '{') { #print ">anonnode: {}\n"; my $genid = "<$self->{ansuri}c_$self->{cid}>"; $self->{cid}++; # ns mapping is passed to inner context $self->{ns}->{$genid} = {}; foreach (keys %{$self->{ns}->{$self->{context}}}) { $self->{ns}->{$genid}->{$_} = $self->{ns}->{$self->{context}}->{$_}; #print ">prefix '$_' passed to inner context\n"; } my $parent_context = $self->{context}; $self->{context} = $genid; $self->_exist_quantif($genid); # quantifying the new context $self->_statement_list; # parsing nested statements $self->{context} = $parent_context; # next step $self->_eat_EOLs; my $tk = $self->{reader}->get; #print ">next token: $tk\n"; if ($tk =~ /^\}([,;\.\]\}\)])?$/) { unshift @{$self->{reader}->{tokens}}, $1 if $1; } else { $self->_do_error(108, $tk); } return $genid; } else { #print ">anonnode: ()\n"; my $next = $self->_eat_EOLs; # if ($next =~ /^\)([,;\.\]\}\)])*$/) { if ($next =~ /^\)(.*)$/) { #print ">void ()\n"; $self->{reader}->get; unshift @{$self->{reader}->{tokens}}, $1 if $1; return $self->_built_in_verb('daml','nil'); } else { #print ">anonnode () starts: $next\n"; my @nodes = (); until ($next =~ /^.*\)[,;\.\]\}\)]*$/) { push @nodes, $self->_object; $next = $self->_eat_EOLs; } if ($next =~ /^([^)]*)\)([,;\.\]\}\)]*)$/) { $self->{reader}->get; unshift @{$self->{reader}->{tokens}}, $2 if $2; unshift @{$self->{reader}->{tokens}}, ')'; if ($1) { unshift @{$self->{reader}->{tokens}}, $1; push @nodes, $self->_object; } $self->{reader}->get; } my $pref = $self->_built_in_verb('daml',''); my $i = 0; my @expnl = (); # expanded node list foreach (@nodes) { $i++; push @expnl, '['; push @expnl, $pref . 'first'; push @expnl, $_; push @expnl, ';'; push @expnl, $pref . 'rest'; push @expnl, $pref . 'nil' if $i == scalar @nodes; } for (my $j = 0; $j < $i; $j++) {push @expnl, ']'} unshift @{$self->{reader}->{tokens}}, @expnl; my $exp = join(' ', @expnl); #print ">expanded: $exp\n"; my $genid = $self->_anonymous_node; return $genid; } } } ######################################## # utils sub _exist_quantif { my ($self, $anode) = @_; if ($self->{quantif}) { my $qname = $self->_built_in_verb('log','forSome'); #print ">existential quantification: $anode\n"; #print ">CONTEXT: $self->{context}\n"; #print ">SUBJECT: $self->{context}\n"; #print ">PROPERTY: $qname"; #print ">-$anode\n"; $self->_process_statement($self->{context}, [[$qname, $anode]]); } } sub _eat_EOLs { my ($self) = @_; my $next = $self->{reader}->try; while ($next eq ' EOL ') { $self->{reader}->get; $next = $self->{reader}->try; } return $next; } # comment inside a list sub _check_inline_comment { my ($self, $next) = @_; if ($next =~ /^#/) { $self->_space; $next = $self->_eat_EOLs; } return $next; } sub _built_in_verb { my ($self, $key, $verb) = @_; # resolves possible NS conflicts my $i = 1; while ($self->{ns}->{$self->{context}}->{$self->{hardns}->{$key}->[0]} and $self->{ns}->{$self->{context}}->{$self->{hardns}->{$key}->[0]} ne $self->{hardns}->{$key}->[1]) { $self->{hardns}->{$key}->[0] = "$key$i"; $i++; } # adds prefix-NS binding $self->{ns}->{$self->{context}}->{$self->{hardns}->{$key}->[0]} = $self->{hardns}->{$key}->[1]; return "$self->{hardns}->{$key}->[0]:$verb"; } sub _unesc_qname { my $qname = shift; #print ">escaped qname: $qname\n"; my $i = 0; my @unesc = (); while ($qname =~ /(__+)/) { my $res = substr(sprintf("%b", length($1) + 1), 1); $res =~ s/1/-/g; $res =~ s/0/_/g; $qname =~ s/__+/<$i>/; push @unesc, $res; $i++; } for ($i=0; $i<@unesc; $i++) { $qname =~ s/<$i>/$unesc[$i]/; } #print ">unescaped qname: $qname\n"; return $qname; } sub _unesc_string { my ($self, $str) = @_; $str =~ s/\\\n//go; $str =~ s/\\\\/\\/go; $str =~ s/\\'/'/go; $str =~ s/\\"/"/go; $str =~ s/\\n/\n/go; $str =~ s/\\r/\r/go; $str =~ s/\\t/\t/go; $str =~ s/\\u([\da-fA-F]{4})/pack('U',hex($1))/ge; $str =~ s/\\U00([\da-fA-F]{6})/pack('U',hex($1))/ge; $str =~ s/\\([\da-fA-F]{3})/pack('C',oct($1))/ge; #deprecated $str =~ s/\\x([\da-fA-F]{2})/pack('C',hex($1))/ge; #deprecated return $str; } ######################################## sub _do_error { my ($self, $n, $tk) = @_; my %msg = ( 1 => 'file not specified', 2 => 'file not found', 3 => 'string not specified', 4 => 'invalid parameter of quantification method (0|1)', 101 => 'bind directive is obsolete, use @prefix instead', 102 => 'invalid namespace prefix', 103 => 'invalid URI reference (uri_ref2)', 104 => 'end of verb (->) expected', 105 => 'invalid characters in string1', 106 => 'namespace prefix not bound', 107 => 'invalid end of anonnode, ] expected', 108 => 'invalid end of anonnode, } expected', 109 => 'end of verb (of) expected', 110 => 'end of verb (-<) expected', 111 => 'string1 ("...") is not terminated', 112 => 'invalid characters in string2', 113 => 'string2 ("""...""")is not terminated', 114 => 'string1 ("...") can\'t include newlines', 115 => 'end of statement expected', 116 => 'invalid node', 117 => 'last keyword expected', 199 => ':- token not supported yet', 201 => '[Triples] attempt to add invalid node', 202 => '[Triples] literal not allowed as subject or predicate', #301 => '[SAX] systemID source not implemented', 302 => '[SAX] characterStream source not implemented', 401 => '[XML] unable to convert URI predicate to QName', 402 => '[XML] subject not recognized - internal error', 501 => '[RDFCore] literal not allowed as subject', 502 => '[RDFCore] valid storage not specified', 503 => '[RDFStore] literal not allowed as subject', ); my $msg = "[Error $n]"; $msg .= " line $self->{reader}->{ln}, token" if $n > 100; $msg .= " \"$tk\"\n"; $msg .= "$msg{$n}!\n"; croak $msg; } 1;