#!/usr/bin/perl # This script will extract all the records out of a oai-pmh target # and store them in a directory of your choosing. # # You will see record identifiers on STDOUT as records are being retrieved. use strict; use warnings; use Net::OAI::Harvester; use Carp qw(carp); my ($baseURL, $dir) = @ARGV; unless ($baseURL and $dir) { print "usage: oai-dump oai-base-url directory\n"; print " eg. oai-dump http://memory.loc.gov/cgi-bin/oai2_0 loc_data\n"; exit 1; } # create the directory if necessary mkdir $dir unless -d $dir; ## create a harvester my $harvester = Net::OAI::Harvester->new( baseURL => $baseURL, dumpDir => $dir ); ## list all the records in a repository my $records = $harvester->listAllRecords( 'metadataPrefix' => 'oai_dc' ); while (my $record = $records->next()) { print $record->header->identifier, "\n"; }