#!/usr/bin/perl # This example reads the output.xml file generated by pacific.pl. You # certainly want to run that one first. With slight modifications, it # can read other files as well - but the multi-character entity decoding # won't be obvious (and might had adverse side-effects). Many of the # same comments apply to this example. It reads incrementally to allow # handling large records. # First, a few administrative details. If you run this from someplace # other than the "eg" directory of the distribution, you will probably # need to fix the paths. use lib '../blib/lib'; use MARC::XML 0.3; unlink "output2.txt"; my $m = MARC::XML->new(); # We'll create a couple of "identifiable" multi-character-output entities. # We start with the "base" set - although a completely independent set is # also possible. You could even do UCS-2 or UTF-8 this way. But unless you # already have the rest of the code in those formats, this translation # will not be a complete solution. And for UTF-8, a more efficient option # is possible by using a different DTD file to redefine entities in the # "internal" character map used by Expat. my $charhash = $m->register_default; # MARC 8-bit character set ${$charhash}{softsign} = '[I am the VERY model]'; ${$charhash}{caron} = '[of a VERY modern entity]'; # Setup an XML::Parser and define options. my $count = $m->openxml({file=>"output.xml",charset=>$charhash}); while ($m->nextxml(1)) { # Loop printing titles. my ($title) = $m->getvalue({record=>'1',field=>'245',subfield=>'a'}); print "$title\n"; # Use a different name for the text output than used by pacific.pl. # Hence, you can compare them. $m->output({file=>">>output2.txt",format=>"ascii"}); $m->deletemarc(); #empty the object for reading in another $count++; } $m->closexml || die "Can't close"; print "\nprocessed $count records\n";