The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!perl

=head2 NAME

007 cleanup script

=head2 Description

Runs through each record in file.
Calls subroutine to check validity of each 007 value.
Reports any records needing manual correcting (outputs these to file). Cleans fields with valid values, but that are too long, and outputs these to separate file.

Both output files are USMARC/MARC21 format.

=cut

########################
### Program template ###
########################
###########################
### Initialize includes ###
### and basic needs     ###
###########################
use MARC::Batch;
use MARC::BBMARC;
use MARC::Lintadditions;
##Time coding to wrap around program to determine how long execution takes:

##########################
## Time coding routines ##
## Print start time and ##
## set start variable   ##
##########################

use Time::HiRes qw(  tv_interval );
# measure elapsed time 
my $t0 = [Time::HiRes::time()];
my $startingtime = MARC::BBMARC::startstop_time();
# do bunch of stuff here
#########################
### Start main program ##
#########################
print ("Welcome to 007 cleanup script\n");

##### File handling initialization ######
#prompt for updated file
print ("What is the input file?: ");
my $inputfile=<>;
chomp $inputfile;
$inputfile =~ s/^\"(.*)\"$/$1/;
#initialize $infile as new MARC::Batch object
my $batch = MARC::Batch->new('USMARC', "$inputfile");

print ("Export file for cleaned records\n");
print ("Export record file: ");
#read command line for name of export file
my $cleanfile= <>;
chomp $cleanfile;
$cleanfile =~ s/^\"(.*)\"$/$1/;
open(CLEANOUT, ">$cleanfile") or die "cannot open Cleanout\n";
if ($^O eq 'MacOS') {
#set creator and type to BBEdit and Text
MacPerl::SetFileInfo('R*ch', 'TEXT', $cleanfile);
}

print ("Export file for manual check records\n");
print ("Manual check record file: ");
my $badfieldfile = <>;
chomp $badfieldfile;
$badfieldfile =~ s/^\"(.*)\"$/$1/;
open(VISUALIZEOUT, ">$badfieldfile") or die "cannot open Badfieldfile\n";
if ($^O eq 'MacOS') {
#set creator and type to BBEdit and Text
MacPerl::SetFileInfo('R*ch', 'TEXT', $badfieldfile);
}

############################################
# Set start time for main calculation loop #
############################################
my $t1 = [Time::HiRes::time()];
my $runningrecordcount=0;
############################################

my $fieldscleaned = 0;
my $badbytecount = 0;
my $visualizecount = 0;
my $cleanedreccount = 0;

#loop through batch file of records
while (my $record = $batch->next()) {
#$reccleaned will be true if 007 is replaced
my $reccleaned = 0;
#$badrecord will be true if bytes have 'bad' or if data exists after limit
my $badrecord;

#look at each 007 field in record
foreach my $field ( $record->field('007') ) {
my $field007 = $field->as_string();
my @bytes = (split ('', $field007));

#clean byte[2] before passing (change 'u' or '|' to blank space)
$bytes[2] =~ s/[u|]/ /;
#call validate007 sub, which is part of Lintadditions
#The sub returns an arrayref and a scalarref
my ($arrayref007, $hasextradataref)  = MARC::Lintadditions::validate007(\@bytes);

#dereference the returned values
my @cleaned007 = @$arrayref007;
#loop through the array looking for bad bytes
for (my $i = 0 ; $i <= $#cleaned007; $i++) {
if ($cleaned007[$i] eq 'bad'){
#set marker for printing bad record
$badrecord = 1;
$badbytecount++;
} #if bad byte
} #for each byte

#check for data after valid limit
if ($$hasextradataref) {
#set marker for printing bad record
$badrecord = 1;
$visualizecount++;
}

#unless badrecord was found, replace old 007 with new
#also ignore 007s that didn't have extra spaces
unless ($badrecord) {
my $cleaned007data = join ('', @cleaned007);
#ignore good 007 fields
if ($field007 eq $cleaned007data) {next;}
else {
my $newfield = MARC::Field->new('007', $cleaned007data);
$field->replace_with($newfield);
$reccleaned = 1;
$fieldscleaned++;
} #else
} #unless

} #for each 007 field

#if bad 007 was found print it out for manual checking
if ($badrecord) {
print VISUALIZEOUT ($record->as_usmarc()); 
}
#otherwise, print cleaned record
elsif ($reccleaned) {print CLEANOUT ($record->as_usmarc());
$cleanedreccount++;}

#####################################
## Place the following within loop ##
#####################################
$runningrecordcount++;
MARC::BBMARC::counting_print ($runningrecordcount);
#####################################
} #while records are in infile

print "$badbytecount total bytes are bad\n";
print "$visualizecount fields have data after limit\n";
print "$fieldscleaned fields were cleaned in $cleanedreccount records\n";

##########################
### Main program done.  ##
### Report elapsed time.##
##########################
my $elapsed = tv_interval ($t0);
my $calcelapsed = tv_interval ($t1);
print sprintf ("%.4f %s\n", "$elapsed", "seconds from execution\n");
print sprintf ("%.4f %s\n", "$calcelapsed", "seconds to calculate\n");
my $endingtime = MARC::BBMARC::startstop_time();
print "Started at $startingtime\nEnded at $endingtime";
print "Press Enter to quit";
<>;

#####################
### END OF PROGRAM ##
#####################

=head1 LICENSE

This code may be distributed under the same terms as Perl itself. 

Please note that this code is not a product of or supported by the 
employers of the various contributors to the code.

=head1 AUTHOR

Bryan Baldus
eija [at] inwave [dot] com

Copyright (c) 2003-2004

=cut