package HTTP::OAI::Repository; use strict; use warnings; use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); require Exporter; @ISA = qw(Exporter); @EXPORT = qw(); @EXPORT_OK = qw( &validate_request &validate_request_1_1 &validate_date &validate_metadataPrefix &validate_responseDate &validate_setSpec ); %EXPORT_TAGS = (validate=>[qw(&validate_request &validate_date &validate_metadataPrefix &validate_responseDate &validate_setSpec)]); use HTTP::OAI::Error qw(%OAI_ERRORS); # Copied from Simeon Warner's tutorial at # http://library.cern.ch/HEPLW/4/papers/3/OAIServer.pm # (note: corrected grammer for ListSets) # 0 = optional, 1 = required, 2 = exclusive my %grammer = ( 'GetRecord' => { 'identifier' => [1, \&validate_identifier], 'metadataPrefix' => [1, \&validate_metadataPrefix] }, 'Identify' => {}, 'ListIdentifiers' => { 'from' => [0, \&validate_date], 'until' => [0, \&validate_date], 'set' => [0, \&validate_setSpec_2_0], 'metadataPrefix' => [1, \&validate_metadataPrefix], 'resumptionToken' => [2, sub { 0 }] }, 'ListMetadataFormats' => { 'identifier' => [0, \&validate_identifier] }, 'ListRecords' => { 'from' => [0, \&validate_date], 'until' => [0, \&validate_date], 'set' => [0, \&validate_setSpec_2_0], 'metadataPrefix' => [1, \&validate_metadataPrefix], 'resumptionToken' => [2, sub { 0 }] }, 'ListSets' => { 'resumptionToken' => [2, sub { 0 }] } ); sub new { my ($class,%args) = @_; my $self = bless {}, $class; $self; } sub validate_request { validate_request_2_0(@_); } sub validate_request_2_0 { my %params = @_; my $verb = $params{'verb'}; delete $params{'verb'}; my @errors; return (new HTTP::OAI::Error(code=>'badVerb',message=>'No verb supplied')) unless defined $verb; my $grm = $grammer{$verb} or return (new HTTP::OAI::Error(code=>'badVerb',message=>"Unknown verb '$verb'")); if( defined $params{'from'} && defined $params{'until'} ) { if( granularity($params{'from'}) ne granularity($params{'until'}) ) { return (new HTTP::OAI::Error( code=>'badArgument', message=>'Granularity used in from and until must be the same' )); } } # Check exclusivity foreach my $arg (keys %$grm) { my ($type, $valid_func) = @{$grm->{$arg}}; next unless ($type == 2 && defined($params{$arg})); if( my $err = &$valid_func($params{$arg}) ) { return (new HTTP::OAI::Error( code=>'badArgument', message=>("Bad argument ($arg): " . $err) )); } delete $params{$arg}; if( %params ) { for(keys %params) { push @errors, new HTTP::OAI::Error( code=>'badArgument', message=>"'$_' can not be used in conjunction with $arg" ); } return @errors; } else { return (); } } # Check required/optional foreach my $arg (keys %$grm) { my ($type, $valid_func) = @{$grm->{$arg}}; if( $params{$arg} ) { if( my $err = &$valid_func($params{$arg}) ) { return (new HTTP::OAI::Error(code=>'badArgument',message=>"Bad argument ($arg): " . $err)) } } if( $type == 1 && (!defined($params{$arg}) || $params{$arg} eq '') ) { return (new HTTP::OAI::Error(code=>'badArgument',message=>"Required argument '$arg' was undefined")); } delete $params{$arg}; } if( %params ) { for(keys %params) { push @errors, new HTTP::OAI::Error( code=>'badArgument', message=>"'$_' is not a recognised argument for $verb" ); } return @errors; } else { return (); } } sub granularity { my $date = shift; return 'year' if $date =~ /^\d{4}-\d{2}-\d{2}$/; return 'seconds' if $date =~ /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$/; } sub validate_date { my $date = shift; return "Date not in OAI format (yyyy-mm-dd or yyyy-mm-ddThh:mm:ssZ)" unless $date =~ /^(\d{4})-(\d{2})-(\d{2})(T\d{2}:\d{2}:\d{2}Z)?$/; my( $y, $m, $d ) = ($1,($2||1),($3||1)); return "Month in date is not in range 1-12" if ($m < 1 || $m > 12); return "Day in date is not in range 1-31" if ($d < 1 || $d > 31); 0; } sub validate_responseDate { return shift =~ /^(\d{4})\-([01][0-9])\-([0-3][0-9])T([0-2][0-9]):([0-5][0-9]):([0-5][0-9])[\+\-]([0-2][0-9]):([0-5][0-9])$/ ? 0 : "responseDate not in OAI format (yyyy-mm-ddThh:mm:dd:ss[+-]hh:mm)"; } sub validate_setSpec { return shift =~ /^([A-Za-z0-9])+(:[A-Za-z0-9]+)*$/ ? 0 : "Set spec not in OAI format, must match ^([A-Za-z0-9])+(:[A-Za-z0-9]+)*\$"; } sub validate_setSpec_2_0 { return shift =~ /^([A-Za-z0-9_!'\$\(\)\+\-\.\*])+(:[A-Za-z0-9_!'\$\(\)\+\-\.\*]+)*$/ ? 0 : "Set spec not in OAI format, must match ([A-Za-z0-9_!'\\\$\(\\)\\+\\-\\.\\*])+(:[A-Za-z0-9_!'\\$\\(\\)\\+\\-\\.\\*]+)*"; } sub validate_metadataPrefix { return shift =~ /^[\w]+$/ ? 0 : "Metadata prefix not in OAI format, must match regexp ^[\\w]+\$"; } # OAI 2 requires identifiers by valid URIs # This doesn't check for invalid chars, merely : sub validate_identifier { return shift =~ /^[[:alpha:]][[:alnum:]\+\-\.]*:.+/ ? 0 : "Identifier not in OAI format, must match regexp ^[[:alpha:]][[:alnum:]\\+\\-\\.]*:.+"; } 1; __END__ =head1 NAME HTTP::OAI::Repository - Documentation for building an OAI compliant repository using OAI-PERL =head1 DESCRIPTION Using the OAI-PERL library in a repository context requires the user to build the OAI responses to be sent to OAI harvesters. =head1 SYNOPSIS 1 use HTTP::OAI::Harvester; use HTTP::OAI::Metadata::OAI_DC; use XML::SAX::Writer; use XML::LibXML; # (all of these options _must_ be supplied to comply with the OAI protocol) # (protocolVersion and responseDate both have sensible defaults) my $r = new HTTP::OAI::Identify( baseURL=>'http://yourhost/cgi/oai', adminEmail=>'youremail@yourhost', repositoryName=>'agoodname', requestURL=>self_url() ); # Include a description (an XML::LibXML Dom object) $r->description(new HTTP::OAI::Metadata(dom=>$dom)); my $r = HTTP::OAI::GetRecord->new( header=>HTTP::OAI::Header->new( identifier=>'oai:myrepo:10', datestamp=>'2004-10-01' ), metadata=>HTTP::OAI::Metadata::OAI_DC->new( dc=>{title=>['Hello, World!'],description=>['My Record']} ) ); $r->about(HTTP::OAI::Metadata->new(dom=>$dom)); my $writer = XML::SAX::Writer->new(); $r->set_handler($writer); $r->generate; =head1 Building an OAI compliant repository The validation scripts included in this module provide the repository admin with a number of tools for helping with being OAI compliant, however they can not be exhaustive in themselves. =head1 METHODS =over 4 =item $r = HTTP::OAI::Repository::validate_request(%paramlist) =item $r = HTTP::OAI::Repository::validate_request_2_0(%paramlist) These functions, exported by the Repository module, validate an OAI request against the protocol requirements. Returns an L object, with the code set to 200 if the request is well-formed, or an error code and the message set. e.g: my $r = validate_request(%paramlist); print header(-status=>$r->code.' '.$r->message), $r->error_as_HTML; Note that validate_request attempts to be as strict to the Protocol as possible. =item $b = HTTP::OAI::Repository::validate_date($date) =item $b = HTTP::OAI::Repository::validate_metadataPrefix($mdp) =item $b = HTTP::OAI::Repository::validate_responseDate($date) =item $b = HTTP::OAI::Repository::validate_setSpec($set) These functions, exported by the Repository module, validate the given type of OAI data. Returns true if the given value is sane, false otherwise. =back =head1 EXAMPLE See the bin/gateway.pl for an example implementation (it's actually for creating a static repository gateway, but you get the idea!).