package MARC::XML; use Carp; use strict; use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $XDEBUG $XTEST); require 5.004; require Exporter; use MARC 1.07; use XML::Parser 2.27; $VERSION = 0.4; $XDEBUG = 0; $XTEST = 0; @ISA = qw(Exporter MARC); @EXPORT= qw(); @EXPORT_OK= qw(); #### Not using these yet #### %EXPORT_TAGS = (USTEXT => [qw( marc2ustext )]); #### Exporter::export_ok_tags('USTEXT'); #### $EXPORT_TAGS{ALL} = \@EXPORT_OK; sub xcarp { Carp::carp (@_) unless $XTEST; } #################################################################### # variables used in subroutines called by parser # # not currently per-object, so one XML conversion at a time # #################################################################### my $count; my $field; my @subfields; my $subfield; my $i1; my $i2; my $fieldvalue; my $subfieldvalue; my $recordnum; my $marc_obj; my $reorder; my $enthash; # ref to entity decoding hash #################################################################### # templates used to output headers # #################################################################### my $head1 = ''; my $head2 = '%s'; #################################################################### # handlers for the XML elements, the so called "subs style". # #################################################################### sub record { $count++; } sub field { (my $expat, my $el, my %atts)=@_; $field=$atts{'type'}; if ($field>9) { $i1=$atts{i1}; $i2=$atts{i2}; } } sub field_ { (my $expat, my $el)=@_; if ($field eq "000") { $recordnum=$marc_obj->createrecord({leader=>$fieldvalue}); } elsif ($field < 10) { $marc_obj->addfield({ record=>$recordnum, field=>$field, ordered=>$reorder, value=>[$fieldvalue] }); } else { $marc_obj->addfield({ record=>$recordnum, field=>$field, ordered=>$reorder, i1=>$i1, i2=>$i2, value=>[@subfields] }); } $field=undef; $i1=undef; $i2=undef; $fieldvalue=undef; @subfields=(); } sub subfield { (my $expat, my $el, my %atts)=@_; $subfield=$atts{type}; } sub subfield_ { (my $expat, my $el)=@_; push(@subfields,$subfield,$subfieldvalue); $subfield=undef; $subfieldvalue=undef; } sub handle_char { (my $expat, my $string)=@_; if ($subfield) {$subfieldvalue.=$string} elsif ($field) {$fieldvalue.=$string} } sub handle_extent { my ($p, $base, $sys, $pub) = @_; print "handle_extent: $base, $sys, $pub\n" if ($XDEBUG); if (exists $$enthash{$sys}) { if ($subfield) {$subfieldvalue.=$$enthash{$sys}} elsif ($field) {$fieldvalue.=$$enthash{$sys}} return ""; } local(*FOO); open(FOO, $sys) or die "Couldn't open entity $sys"; return *FOO; } #################################################################### # new() is the constructor method for MARC::XML. new() takes two # # arguements which are used to automatically read in the entire # # contents of an XML file. If a format other than "xml" is # # specified then the MARC.pm new() constructor is called. # #################################################################### sub new { my $proto = shift; my $class = ref($proto) || $proto; my $file=shift; my $format; if ($file) { $format = shift || "xml"; } my $rcount; my $marc; if ($file and $format=~/xml$/oi) { $marc = $class->SUPER::new(); $reorder = shift || "n"; unless (-e $file) {xcarp "File $file doesn't exist"; return} #if the file doesn't exist return an error $rcount = _readxml($marc, $file); } else { $marc = $class->SUPER::new($file,$format); } bless($marc,$class); return $marc; } #################################################################### # simple wrapper methods to simplify outputting # # just pass through all parameters except format # #################################################################### sub output_header { my ($marc,$params)=@_; $params->{'format'} = "xml_header"; return output($marc,$params); } sub output_body { my ($marc,$params)=@_; $params->{'format'} = "xml_body"; return output($marc,$params); } sub output_footer { my ($marc,$params)=@_; $params->{'format'} = "xml_footer"; return output($marc,$params); } #################################################################### # the output() method overloads the MARC::output method and allows # # the user to output a MARC object as XML to a file or into a # # variable. If the format parameter is not used "xml" is assumed, # # and if the format is declared but it doesn't match "xml", # # "xml_header", "xml_body", or "xml_footer" then the output # # command is passed up to the MARC package to see what can be done # # with it there. # #################################################################### sub output { (my $marc, my $params)=@_; my $file=$params->{file}; my $newline = $params->{lineterm} || "\n"; my $basecode = $params->{encoding} || "US-ASCII"; my $dtd = $params->{dtd_file} || ""; my $stand = $params->{standalone} || $dtd ? "no" : "yes"; my $output=""; unless (exists $params->{'format'}) { $params->{'format'} = "xml"; $params->{lineterm} = $newline; } if ($params->{'format'} =~ /xml$/oi) { $output = sprintf $head1, $basecode, $stand; $output .= sprintf $head2, $newline, $dtd if ($dtd); $output .= "$newline$newline$newline"; $output .= _marc2xml($marc,$params); $output .= "$newline"; } elsif ($params->{'format'} =~ /xml_header$/oi) { $output = sprintf $head1, $basecode, $stand; $output .= sprintf $head2, $newline, $dtd if ($dtd); $output .= "$newline$newline$newline"; } elsif ($params->{'format'} =~ /xml_body$/oi) { $output=_marc2xml($marc,$params); } elsif ($params->{'format'} =~ /xml_footer$/oi) { $output="$newline"; } else { return $marc->SUPER::output($params); } #output to a file or return the $output if ($params->{file}) { if ($params->{file} !~ /^>/) {carp "Don't forget to use > or >>: $!"} open (OUT, "$params->{file}") || carp "Couldn't open file: $!"; binmode OUT; print OUT $output; close OUT || carp "Couldn't close file: $!"; return 1; } #if no filename was specified return the output so it can be grabbed else { return $output; } } #################################################################### # _readxml is an internal subroutine for reading in MARC data that # # is encoded in XML. It is called via new() # # XML::Parser must be installed in your Perl library for this to # # work. If no records are read in an error will be generated. # #################################################################### sub _readxml { $marc_obj = shift; # must be package global my $file = shift; unless ($enthash) { $enthash = register_default(); # hash ref } #create the parser object and parse the xml file my $xmlfile = new XML::Parser(Style=>'Subs', ParseParamEnt => 1, ErrorContext => 2, Handlers => {Char => \&handle_char, ExternEnt => \&handle_extent} ); $xmlfile->parsefile($file); unless ($count) {carp "Error reading XML $!";} return $count; } #################################################################### # openxml() is a method for reading in an XML file. It takes # # several parameters: file (name of the xml file) ; increment # # increment which defines how many records to read in ; and a # # reference to a charset hash used to decode xml entities # #################################################################### sub openxml { $marc_obj = shift; # must be package global my $params = shift; my $file=$params->{file}; if (not(-e $file)) {xcarp "File \"$file\" doesn't exist"; return} $marc_obj->[0]{'format'}= 'xml'; #store format in object $count = 0; $marc_obj->[0]{'increment'}=$params->{'increment'} || 0; #store increment in the object, default is 0 open (*file, $file); binmode *file; $marc_obj->[0]{'handle'}=\*file; #store filehandle in object my $handle = $marc_obj->[0]{'handle'}; if (exists $params->{charset}) { $enthash = $params->{charset}; # hash ref } else { unless ($enthash) { $enthash = register_default(); # hash ref } } my $p = new XML::Parser(Style=>'Subs', ParseParamEnt => 1, ErrorContext => 2, Handlers => {Char => \&handle_char, ExternEnt => \&handle_extent} ); # Create the non-blocking parser $marc_obj->[0]{'expat'} = $p->parse_start; print "read in $count records\n" if $XDEBUG; if ($count==0) {$count="0 but true"} return $count; } #################################################################### # closexml() will close a file-handle that was opened with # # openxml() # #################################################################### sub closexml { my $marc = shift; $marc->[0]{'increment'}=0; if (not($marc->[0]{'handle'})) { xcarp "There isn't a MARC file to close"; return; } my $ok = close $marc->[0]{'handle'}; $marc->[0]{'expat'}->parse_done; $marc->[0]{'handle'}=undef; $marc->[0]{'expat'}=undef; return $ok; } #################################################################### # nextxml() will read in more records from a file that has # # already been opened with openxml(). the increment can be # # adjusted if necessary by passing a new value as a parameter. the # # new records will be APPENDED to the MARC object # #################################################################### sub nextxml { $marc_obj=shift; my $increment=shift; my $handle = $marc_obj->[0]{'handle'}; if (not $handle) { xcarp "There isn't a MARC file open"; return; } $marc_obj->[0]{'increment'}=$increment; $count = 0; local $/ = ""; while (($increment==-1 or $count<$increment) and my $record=<$handle>) { $marc_obj->[0]{'expat'}->parse_more($record); } return $count; } sub register_default { # upper-register entities (8-bit to 7-bit) my @hexchar = (0x80..0x8c,0x8f..0xa0,0xaf,0xbb, 0xbe,0xbf,0xc7..0xdf,0xfc,0xfd,0xff); my %inchar = map {sprintf ("x%2.2X",int $_), chr($_)} @hexchar; $inchar{joiner} = chr(0x8d); # zero width joiner $inchar{nonjoin} = chr(0x8e); # zero width non-joiner $inchar{Lstrok} = chr(0xa1); # latin capital letter l with stroke $inchar{Ostrok} = chr(0xa2); # latin capital letter o with stroke $inchar{Dstrok} = chr(0xa3); # latin capital letter d with stroke $inchar{THORN} = chr(0xa4); # latin capital letter thorn (icelandic) $inchar{AElig} = chr(0xa5); # latin capital letter AE $inchar{OElig} = chr(0xa6); # latin capital letter OE $inchar{softsign} = chr(0xa7); # modifier letter soft sign $inchar{middot} = chr(0xa8); # middle dot $inchar{flat} = chr(0xa9); # musical flat sign $inchar{reg} = chr(0xaa); # registered sign $inchar{plusmn} = chr(0xab); # plus-minus sign $inchar{Ohorn} = chr(0xac); # latin capital letter o with horn $inchar{Uhorn} = chr(0xad); # latin capital letter u with horn $inchar{mlrhring} = chr(0xae); # modifier letter right half ring (alif) $inchar{mllhring} = chr(0xb0); # modifier letter left half ring (ayn) $inchar{lstrok} = chr(0xb1); # latin small letter l with stroke $inchar{ostrok} = chr(0xb2); # latin small letter o with stroke $inchar{dstrok} = chr(0xb3); # latin small letter d with stroke $inchar{thorn} = chr(0xb4); # latin small letter thorn (icelandic) $inchar{aelig} = chr(0xb5); # latin small letter ae $inchar{oelig} = chr(0xb6); # latin small letter oe $inchar{hardsign} = chr(0xb7); # modifier letter hard sign $inchar{inodot} = chr(0xb8); # latin small letter dotless i $inchar{pound} = chr(0xb9); # pound sign $inchar{eth} = chr(0xba); # latin small letter eth $inchar{ohorn} = chr(0xbc); # latin small letter o with horn $inchar{uhorn} = chr(0xbd); # latin small letter u with horn $inchar{deg} = chr(0xc0); # degree sign $inchar{scriptl} = chr(0xc1); # latin small letter script l $inchar{phono} = chr(0xc2); # sound recording copyright $inchar{copy} = chr(0xc3); # copyright sign $inchar{sharp} = chr(0xc4); # sharp $inchar{iquest} = chr(0xc5); # inverted question mark $inchar{iexcl} = chr(0xc6); # inverted exclamation mark $inchar{hooka} = chr(0xe0); # combining hook above $inchar{grave} = chr(0xe1); # combining grave $inchar{acute} = chr(0xe2); # combining acute $inchar{circ} = chr(0xe3); # combining circumflex $inchar{tilde} = chr(0xe4); # combining tilde $inchar{macr} = chr(0xe5); # combining macron $inchar{breve} = chr(0xe6); # combining breve $inchar{dot} = chr(0xe7); # combining dot above $inchar{diaer} = chr(0xe8); # combining diaeresis $inchar{uml} = chr(0xe8); # combining umlaut $inchar{caron} = chr(0xe9); # combining hacek $inchar{ring} = chr(0xea); # combining ring above $inchar{llig} = chr(0xeb); # combining ligature left half $inchar{rlig} = chr(0xec); # combining ligature right half $inchar{rcommaa} = chr(0xed); # combining comma above right $inchar{dblac} = chr(0xee); # combining double acute $inchar{candra} = chr(0xef); # combining candrabindu $inchar{cedil} = chr(0xf0); # combining cedilla $inchar{ogon} = chr(0xf1); # combining ogonek $inchar{dotb} = chr(0xf2); # combining dot below $inchar{dbldotb} = chr(0xf3); # combining double dot below $inchar{ringb} = chr(0xf4); # combining ring below $inchar{dblunder} = chr(0xf5); # combining double underscore $inchar{under} = chr(0xf6); # combining underscore $inchar{commab} = chr(0xf7); # combining comma below $inchar{rcedil} = chr(0xf8); # combining right cedilla $inchar{breveb} = chr(0xf9); # combining breve below $inchar{ldbltil} = chr(0xfa); # combining double tilde left half $inchar{rdbltil} = chr(0xfb); # combining double tilde right half $inchar{commaa} = chr(0xfe); # combining comma above if ($XDEBUG) { foreach my $str (sort keys %inchar) { printf "%s = %x\n", $str, ord($inchar{$str}); } } return \%inchar; } #################################################################### # _marc2xml takes a MARC object as its input and converts it into # # XML. The XML is returned as a string # #################################################################### sub _marc2xml { my ($marc,$params)=@_; my $output; my $newline = $params->{lineterm} || "\n"; my @records; unless (exists $params->{charset}) { unless (exists $marc->[0]{xmlchar}) { $marc->[0]{xmlchar} = ansel_default(); # hash ref } $params->{charset} = $marc->[0]{xmlchar}; } if ($params->{records}) {@records=@{$params->{records}}} else {for (my $i=1;$i<=$#$marc;$i++) {push(@records,$i)}} foreach my $i (@records) { my $recout=$marc->[$i]; #cycle through each record $output.="$newline"; foreach my $fields (@{$recout->{array}}) { #cycle through each field my $tag=$fields->[0]; if ($tag<10) { #no indicators or subfields my $value = _char2xml($fields->[1], $params->{charset}); $output.=qq($value$newline); } else { #indicators and subfields $output.=qq($newline); my @subfldout = @{$fields}[3..$#{$fields}]; while (@subfldout) { #cycle through subfields my $subfield_type = shift(@subfldout); my $subfield_value = _char2xml( shift(@subfldout), $params->{charset} ); $output .= qq( ); $output .= qq($subfield_value$newline); } #finish cycling through subfields $output .= qq($newline); } #finish tag test < 10 } $output.="$newline$newline"; #put an extra newline to separate records } return $output; } sub _char2xml { my @marc_string = split (//, shift); my $charmap = shift; local $^W = 0; # no warnings # the simple case only works for single byte entities my $xml_string = join ('', map { ${$charmap}{$_} } @marc_string); return $xml_string; } sub ansel_default { my @hexchar = (0x00..0x08,0x0b,0x0c,0x0e..0x1f,0x80..0x8c,0x8f..0xa0, 0xaf,0xbb,0xbe,0xbf,0xc7..0xdf,0xfc,0xfd,0xff); my %outchar = map {chr($_), sprintf ("&x%2.2X;",int $_)} @hexchar; my @ascchar = map {chr($_)} (0x09,0x0a,0x0d,0x20,0x21,0x23..0x25, 0x28..0x3b,0x3d,0x3f..0x7f); foreach my $asc (@ascchar) { $outchar{$asc} = $asc; } $outchar{chr(0x22)} = '"'; # quotation $outchar{chr(0x26)} = '&'; # ampersand $outchar{chr(0x27)} = '''; # apostrophe $outchar{chr(0x3c)} = '<'; # less than $outchar{chr(0x3e)} = '>'; # greater than $outchar{chr(0x8d)} = '&joiner;'; # zero width joiner $outchar{chr(0x8e)} = '&nonjoin;'; # zero width non-joiner $outchar{chr(0xa1)} = 'Ł'; # latin capital letter l with stroke $outchar{chr(0xa2)} = '&Ostrok;'; # latin capital letter o with stroke $outchar{chr(0xa3)} = 'Đ'; # latin capital letter d with stroke $outchar{chr(0xa4)} = 'Þ'; # latin capital letter thorn (icelandic) $outchar{chr(0xa5)} = 'Æ'; # latin capital letter AE $outchar{chr(0xa6)} = 'Œ'; # latin capital letter OE $outchar{chr(0xa7)} = '&softsign;'; # modifier letter soft sign $outchar{chr(0xa8)} = '·'; # middle dot $outchar{chr(0xa9)} = '♭'; # musical flat sign $outchar{chr(0xaa)} = '®'; # registered sign $outchar{chr(0xab)} = '±'; # plus-minus sign $outchar{chr(0xac)} = '&Ohorn;'; # latin capital letter o with horn $outchar{chr(0xad)} = '&Uhorn;'; # latin capital letter u with horn $outchar{chr(0xae)} = '&mlrhring;'; # modifier letter right half ring (alif) $outchar{chr(0xb0)} = '&mllhring;'; # modifier letter left half ring (ayn) $outchar{chr(0xb1)} = 'ł'; # latin small letter l with stroke $outchar{chr(0xb2)} = '&ostrok;'; # latin small letter o with stroke $outchar{chr(0xb3)} = 'đ'; # latin small letter d with stroke $outchar{chr(0xb4)} = 'þ'; # latin small letter thorn (icelandic) $outchar{chr(0xb5)} = 'æ'; # latin small letter ae $outchar{chr(0xb6)} = 'œ'; # latin small letter oe $outchar{chr(0xb7)} = '&hardsign;'; # modifier letter hard sign $outchar{chr(0xb8)} = 'ı'; # latin small letter dotless i $outchar{chr(0xb9)} = '£'; # pound sign $outchar{chr(0xba)} = 'ð'; # latin small letter eth $outchar{chr(0xbc)} = '&ohorn;'; # latin small letter o with horn $outchar{chr(0xbd)} = '&uhorn;'; # latin small letter u with horn $outchar{chr(0xc0)} = '°'; # degree sign $outchar{chr(0xc1)} = '&scriptl;'; # latin small letter script l $outchar{chr(0xc2)} = '&phono;'; # sound recording copyright $outchar{chr(0xc3)} = '©'; # copyright sign $outchar{chr(0xc4)} = '♯'; # sharp $outchar{chr(0xc5)} = '¿'; # inverted question mark $outchar{chr(0xc6)} = '¡'; # inverted exclamation mark $outchar{chr(0xe0)} = '&hooka;'; # combining hook above $outchar{chr(0xe1)} = '`'; # combining grave $outchar{chr(0xe2)} = '´'; # combining acute $outchar{chr(0xe3)} = 'ˆ'; # combining circumflex $outchar{chr(0xe4)} = '˜'; # combining tilde $outchar{chr(0xe5)} = '¯'; # combining macron $outchar{chr(0xe6)} = '˘'; # combining breve $outchar{chr(0xe7)} = '˙'; # combining dot above $outchar{chr(0xe8)} = '¨'; # combining diaeresis (umlaut) $outchar{chr(0xe9)} = 'ˇ'; # combining hacek $outchar{chr(0xea)} = '˚'; # combining ring above $outchar{chr(0xeb)} = '&llig;'; # combining ligature left half $outchar{chr(0xec)} = '&rlig;'; # combining ligature right half $outchar{chr(0xed)} = '&rcommaa;'; # combining comma above right $outchar{chr(0xee)} = '˝'; # combining double acute $outchar{chr(0xef)} = '&candra;'; # combining candrabindu $outchar{chr(0xf0)} = '¸'; # combining cedilla $outchar{chr(0xf1)} = '˛'; # combining ogonek $outchar{chr(0xf2)} = '&dotb;'; # combining dot below $outchar{chr(0xf3)} = '&dbldotb;'; # combining double dot below $outchar{chr(0xf4)} = '&ringb;'; # combining ring below $outchar{chr(0xf5)} = '&dblunder;'; # combining double underscore $outchar{chr(0xf6)} = '&under;'; # combining underscore $outchar{chr(0xf7)} = '&commab;'; # combining comma below $outchar{chr(0xf8)} = 'ŗ'; # combining right cedilla $outchar{chr(0xf9)} = '&breveb;'; # combining breve below $outchar{chr(0xfa)} = '&ldbltil;'; # combining double tilde left half $outchar{chr(0xfb)} = '&rdbltil;'; # combining double tilde right half $outchar{chr(0xfe)} = '&commaa;'; # combining comma above if ($XDEBUG) { foreach my $num (sort keys %outchar) { printf "%x = %s\n", ord($num), $outchar{$num}; } } return \%outchar; } return 1; __END__ #################################################################### # D O C U M E N T A T I O N # #################################################################### =pod =head1 NAME MARC::XML - A subclass of MARC.pm to provide XML support. =head1 SYNOPSIS use MARC::XML; #read in some MARC and output some XML $myobject = MARC::XML->new("marc.mrc","usmarc"); $myobject->output({file=>">marc.xml",format=>"xml"}); #read in some XML and output some MARC $myobject = MARC::XML->new("marc.xml","xml"); $myobject->output({file=>">marc.mrc","usmarc"); =head1 DESCRIPTION MARC::XML is a subclass of MARC.pm which provides methods for round-trip conversions between MARC and XML. MARC::XML requires that you have the CPAN modules MARC.pm and XML::Parser installed in your Perl library. Version 1.04 of MARC.pm and 2.27 of XML::Parser (or later) are required. As a subclass of MARC.pm a MARC::XML object will by default have the full functionality of a MARC.pm object. See the MARC.pm documentation for details. The XML file that is read and generated by MARC::XML is not associated with a Document Type Definition (DTD). This means that your files need to be well-formed, but they will not be validated. When performing XML->MARC conversion it is important that the XML file is structured in a particular way. Fortunately, this is the same format that is generated by the MARC->XML conversion, so you should be able to be able to move your data easily between the two formats. =head2 Downloading and Intalling =over 4 =item Download First make sure that you have B and B installed. Both Perl extensions are available from the CPAN http://www.cpan.org/modules/by-module, and they must be available in your Perl library for MARC::XML to work properly. MARC::XML is provided in standard CPAN distribution format. Download the latest version from http://www.cpan.org/modules/by-module/MARC/XML. It will extract into a directory MARC-XML-version with any necessary subdirectories. Once you have extracted the archive Change into the MARC-XML top directory and execute the following command depending on your platform. =item Unix perl Makefile.PL make make test make install =item Win9x/WinNT/Win2000 perl Makefile.PL perl test.pl perl install.pl =item Test Once you have installed, you can check if Perl can find it. Change to some other directory and execute from the command line: perl -e "use MARC::XML" If you B get any response that means everything is OK! If you get an error like I. then Perl is not able to find MARC::XML--double check that the file copied it into the right place during the install. =back =head2 Todo =over 4 =item * Checking for field and record lengths to make sure that data read in from an XML file does not exceed the limited space available in a MARC record. =item * Support for MARC E-E Unicode character conversions. =item * MARC E-E EAD (Encoded Archival Description) conversion? =item * Support for MARC E-E DC/RDF (Dublin Core Metadata encoded in the Resource Description Framework)? =item * Support for MARC E-E FGDC Metadata (Federal Geographic Data Committee) conversion? =back =head2 Web Interface A web interface to MARC.pm and MARC::XML is available at http://libstaff.lib.odu.edu/cgi-bin/marc.cgi where you can upload records and observe the results. If you'd like to check out the cgi script take a look at http://libstaff.lib.odu.edu/depts/systems/iii/scripts/MARCpm/marc-cgi.txt However, to get the full functionality you will want to install MARC.pm and MARC::XML on your server or PC. =head2 Sample XML file Below is an example of the flavor of XML that MARC::XML will generate and read. There are only four elements: the ImarcE> pair that serves as the root for the file; the IrecordE> pair that encloses each record; the IfieldE> pair which encloses each field; and the IsubfieldE> pair which encloses each subfield. In addition the IfieldE> and IsubfieldE> tags have three possible attributes: I which defines the specific tag or subfield ; as well as I and I which allow you to define the indicators for a specific tag. 00901cam 2200241Ia 45e0 ocm01047729 OCoLC 19990808143752.0 741021s1884 enkaf 000 1 eng d KSU KSU GZM PS1305 .A1 1884 VODN Twain, Mark, 1835-1910. The adventures of Huckleberry Finn : (Tom Sawyer's comrade) : scene, the Mississippi Valley : time, forty to fifty years ago / by Mark Twain (Samuel Clemens) ; with 174 illustrations. London : Chatto & Windus, 1884. xvi, 438 p., [1] leaf of plates : ill. ; 20 cm. First English ed. State B; gatherings saddle-stitched with wire staples. Advertisements on p. [1]-32 at end. Bound in red S cloth; stamped in black and gold. BAL 3414. Huckleberry Finn. E0 VOD =head1 METHODS Here is a list of methods available to you in MARC::XML. =head2 new() MARC::XML overides MARC.pm's new() method to create a MARC::XML object. Similar to MARC.pm's new() it can take two arguments: a file name, and the format of the file to read in. However MARC::XML's new() gives you an extra format choice "XML" (which is also the default). Internally, the XML source is converted to a series of B and B calls. The order of MARC tags is preserved by default. But if an optional third argument is passed to new(), it is used as the I option for the B calls. Like MARC.pm, it is not possible to read only part of an XML input file using new(). Some examples: #read in an XML file called myxmlfile.xml use MARC::XML; $x = MARC::XML->new("myxmlfile.xml","xml"); $x = MARC::XML->new("needsort.xml","xml","y"); Since the full funtionality of MARC.pm is also available you can read in other types of files as well. Although new() with no arguments will create an object with no records, just like MARC.pm, XML format not supported by openmarc() and nextmarc() for input. The openxml() and nextxml() methods provide similar operation. And you can output from XML to a different format source. #read in a MARC file called mymarcfile.mrc use MARC::XML; $x = MARC::XML->new("mymarcfile.mrc","usmarc"); $x = MARC::XML->new(); =head2 output() MARC::XML's output() method allows you to output the MARC object as an XML file. It takes eight arguments: I, I, I, and I have the same function as in MARC.pm. If not specified, I defaults to "xml" and I defaults to "\n". A I parameter accepts a hash-reference to a user supplied character translation table. The internal default is based on the LoC "register.sgm" table supplied with the LoC. SGML utilities. You can use the B method to get a hash-reference to it if you only want to modify a couple of characters. See example below. The I, I, and I arguments correspond to the specified fields in an XML header. If not specified, I defaults to "yes" and I to "US-ASCII". If an optional I is specified, a B is added to the output to contain the data. use MARC::XML; $x = MARC::XML->new("mymarcfile.mrc","usmarc"); $x->output({file=>">myxmlfile.xml",format=>"xml"}); Or if you only want to output the first record: $x->output({file=>">myxmlfile.xml",format=>"xml",records=>[1]}); If you like you can also output portions of the XML file using the I options: I, I, and I. Remember to prefix your file name with a >> to append though. This example will output record 1 twice. use MARC::XML; $x = MARC::XML->new("mymarcfile.mrc","usmarc"); $x->output({file=>">myxmlfile.xml",format=>"xml_header"}); $x->output({file=>">>myxmlfile.xml",format=>"xml_body",records=>[1]}); $x->output({file=>">>myxmlfile.xml",format=>"xml_body",records=>[1]}); $x->output({file=>">>myxmlfile.xml",foramt=>"xml_footer"}); Instead of outputting to a file, you can also capture the output in a variable if you wish. use MARC::XML; $x = MARC::XML->new("mymarcfile.mrc","usmarc"); $myxml = $x->output({format=>"xml"}); As with new() the full functionality of MARC.pm's output() method are available to you as well. So you could read in an XML file and then output it as ascii text: use MARC::XML; $x = MARC::XML->new("myxmlfile.xml","xml"); $x->output({file=>">mytextfile.txt","ascii"); =head1 NOTES Please let us know if you run into any difficulties using MARC.pm--we'd be happy to try to help. Also, please contact us if you notice any bugs, or if you would like to suggest an improvement/enhancement. Email addresses are listed at the bottom of this page. Development of MARC.pm and other library oriented Perl utilities is conducted on the Perl4Lib listserv. Perl4Lib is an open list and is an ideal place to ask questions about MARC.pm. Subscription information is available at http://www.vims.edu/perl4lib Two global boolean variables are reserved for test and debugging. Both are "0" (off) by default. The C<$XTEST> variable disables internal error messages generated using I. It should only be used in the automatic test suite. The C<$XDEBUG> variable adds verbose diagnostic messages. =head1 EXAMPLES The B subdirectory contains a few complete examples to get you started. =head1 AUTHORS Chuck Bearden cbearden@rice.edu Bill Birthisel wcbirthisel@alum.mit.edu Derek Lane dereklane@pobox.com Charles McFadden chuck@vims.edu Ed Summers ed@cheetahmail.com =head1 SEE ALSO perl(1), MARC.pm, MARC http://lcweb.loc.gov/marc , XML http://www.w3.org/xml . =head1 COPYRIGHT Copyright (C) 1999,2000, Bearden, Birthisel, Lane, McFadden, and Summers. All rights reserved. This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. 23 April 2000. Portions Copyright (C) 1999,2000, Duke University, Lane. =cut