# $Id: 4_workout.t,v 1.3 2005/04/20 20:03:53 grantm Exp $ use strict; use Test::More; BEGIN { # Seems to be required by older Perls unless(eval { require XML::SAX::Writer }) { plan skip_all => 'XML::SAX::Writer not installed'; } unless(eval { require XML::SAX::ParserFactory }) { plan skip_all => 'XML::SAX::ParserFactory not installed'; } unless(eval { require XML::SAX::Machines }) { plan skip_all => 'XML::SAX::Machines not installed'; } } plan tests => 26; use XML::Filter::Sort; use XML::SAX::Machines qw( :all ); $^W = 1; my(@opts, $xmlin, $xmlout, $sorter); ############################################################################## # Global used to flag disk rather than memory buffering # @main::TempOpts = () unless(@main::TempOpts); ############################################################################## # Sort using full text content as key (including leading digits) # $xmlin = q( 1Zebedee 2Yorick 3Wayne 4Xavier ); $xmlout = ''; @opts = (Record => 'person'); push @opts, @main::TempOpts; $sorter = Pipeline( XML::Filter::Sort->new(@opts) => \$xmlout ); $sorter->parse_string($xmlin); fix_xml($xmlout); is($xmlout, $xmlin, 'Default key to full text content, alpha, asc'); ############################################################################## # Sort using text content of specified child element as a key # $xmlout = ''; @opts = ( Record => 'person', Keys => 'firstname', ); push @opts, @main::TempOpts; $sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout); $sorter->parse_string($xmlin); fix_xml($xmlout); is($xmlout, q( 3Wayne 4Xavier 2Yorick 1Zebedee ), 'Parsed key from string and extracted element content'); ############################################################################## # Check that a 'foreign' element in the middle of a sequence of records # causes the records before and the records after to be sorted as two # independent lists. # $xmlin = q( 1Zebedee 2Yorick popcorn 3Wayne 4Xavier 0 ); $xmlout = ''; @opts = ( Record => 'person', Keys => 'firstname', ); push @opts, @main::TempOpts; $sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout); $sorter->parse_string($xmlin); fix_xml($xmlout); is($xmlout, q( 2Yorick 1Zebedee popcorn 3Wayne 4Xavier 0 ), 'Sorted two independent lists (element between)'); ############################################################################## # Check that non-whitespace text causes the same effect. # $xmlin = q( 1Zebedee 2Yorick popcorn 3Wayne 4Xavier ); $xmlout = ''; @opts = ( Record => 'person', Keys => 'firstname', ); push @opts, @main::TempOpts; $sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout); $sorter->parse_string($xmlin); fix_xml($xmlout); is($xmlout, q( 2Yorick 1Zebedee popcorn 3Wayne 4Xavier ), 'Sorted two independent lists (text between - easy case)'); ############################################################################## # Repeat that last test with slightly different input data to expose a flaw # which probably ought to be fixed. # $xmlin = q( 1Zebedee 2Yorick popcorn 4Xavier 3Wayne ); $xmlout = ''; @opts = ( Record => 'person', Keys => 'firstname', ); push @opts, @main::TempOpts; $sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout); $sorter->parse_string($xmlin); fix_xml($xmlout); TODO: { local $TODO = 'Trailing whitespace on leading text not quite done'; is($xmlout, q( 2Yorick 1Zebedee popcorn 3Wayne 4Xavier ), 'Sorted two independent lists (text between - pathological case)'); } ############################################################################## # Now do a similar test with a comment separating the two record lists. # $xmlin = q( 1Zebedee 2Yorick 4Xavier 3Wayne ); $xmlout = ''; @opts = ( Record => 'person', Keys => 'firstname', ); push @opts, @main::TempOpts; $sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout); $sorter->parse_string($xmlin); fix_xml($xmlout); is($xmlout, q( 2Yorick 1Zebedee 3Wayne 4Xavier ), 'Sorted two independent lists (comment between)'); ############################################################################## # Same again but with a processing instruction separating the two record lists. # $xmlin = q( 1Zebedee 2Yorick 4Xavier 3Wayne ); $xmlout = ''; @opts = ( Record => 'person', Keys => 'firstname', ); push @opts, @main::TempOpts; $sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout); $sorter->parse_string($xmlin); fix_xml($xmlout); is($xmlout, q( 2Yorick 1Zebedee 3Wayne 4Xavier ), 'Sorted two independent lists (PI between)'); ############################################################################## # Check that as each record is buffered, reordered and spat back out, it # retains its own leading whitespace. # $xmlin = q( Zebedee Yorick Wayne Xavier ); $xmlout = ''; @opts = ( Record => 'person', Keys => 'firstname', ); push @opts, @main::TempOpts; $sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout); $sorter->parse_string($xmlin); fix_xml($xmlout); is($xmlout, q( Wayne Xavier Yorick Zebedee ), 'Funky indentation preserved'); ############################################################################## # Throw a namespace definition into the mix and confirm it is ignored. # $xmlin = q( 1Zebedee 2Yorick 3Wayne 4Xavier ); $xmlout = ''; @opts = ( Record => 'person', Keys => 'firstname', ); push @opts, @main::TempOpts; $sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout); $sorter->parse_string($xmlin); fix_xml($xmlout); is($xmlout, q( 3Wayne 4Xavier 2Yorick 1Zebedee ), 'Record selection with optional namespace works'); ############################################################################## # Now sort only the records with no namespace # $xmlin = q( 1Zebedee 2Yorick 3Wayne 4Xavier 5Vernon 6Trevor 7Ulbrecht ); $xmlout = ''; @opts = ( Record => '{}person', Keys => 'firstname', ); push @opts, @main::TempOpts; $sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout); $sorter->parse_string($xmlin); fix_xml($xmlout); is($xmlout, q( 1Zebedee 3Wayne 2Yorick 4Xavier 6Trevor 7Ulbrecht 5Vernon ), 'Record selection with no namespace works'); ############################################################################## # Now sort only the records with specified namespace # $xmlin = q( 1Zebedee 2Yorick 3Wayne 4Xavier 5Vernon 6Trevor 7Ulbrecht ); $xmlout = ''; @opts = ( Record => '{bob.com}person', Keys => 'firstname', ); push @opts, @main::TempOpts; $sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout); $sorter->parse_string($xmlin); fix_xml($xmlout); is($xmlout, q( 3Wayne 2Yorick 1Zebedee 4Xavier 5Vernon 6Trevor 7Ulbrecht ), 'Record selection with specific namespace works'); ############################################################################## # Put some comments into the mix and confirm they are handled correctly. # $xmlin = q( Zebedee Xavier Yorick ); $xmlout = ''; @opts = ( Record => 'person', Keys => 'firstname', ); push @opts, @main::TempOpts; $sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout); $sorter->parse_string($xmlin); fix_xml($xmlout); is($xmlout, q( Xavier Yorick Zebedee ), 'Buffering of comments works'); ############################################################################## # Do the same with processing instructions. # $xmlin = q( Zebedee Xavier Yorick ); $xmlout = ''; @opts = ( Record => 'person', Keys => 'firstname', ); push @opts, @main::TempOpts; $sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout); $sorter->parse_string($xmlin); fix_xml($xmlout); is($xmlout, q( Xavier Yorick Zebedee ), 'Buffering of PIs works'); ############################################################################## # Run a multi-key sort - two alpha keys. # $xmlin = q( This is a list of names & ages Zebedee Boozle Yorick Cabbage Yorick Cabbage Xavier Aardvark ); $xmlout = ''; @opts = ( Record => 'person', Keys => ' lastname firstname @age ', ); push @opts, @main::TempOpts; $sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout); $sorter->parse_string($xmlin); fix_xml($xmlout); is($xmlout, q( This is a list of names & ages Xavier Aardvark Zebedee Boozle Yorick Cabbage Yorick Cabbage ), 'Multi-element records and multi-key sort OK'); ############################################################################## # Introduce a third sort key - numeric. # $xmlout = ''; @opts = ( Record => 'person', Keys => ' lastname, alpha, asc firstname, alpha, asc @age, num, asc ', ); push @opts, @main::TempOpts; $sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout); $sorter->parse_string($xmlin); fix_xml($xmlout); is($xmlout, q( This is a list of names & ages Xavier Aardvark Zebedee Boozle Yorick Cabbage Yorick Cabbage ), 'Numeric sort key OK'); ############################################################################## # Check that descending order works for both alpha and numeric sorts # $xmlout = ''; @opts = ( Record => 'person', Keys => ' firstname, alpha, desc lastname, alpha, asc @age, num, desc ', ); push @opts, @main::TempOpts; $sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout); $sorter->parse_string($xmlin); fix_xml($xmlout); is($xmlout, q( This is a list of names & ages Zebedee Boozle Yorick Cabbage Yorick Cabbage Xavier Aardvark ), 'Descending order OK'); ############################################################################## # Use a code reference rather than alpha or numeric comparator # $xmlin = q( QX54763 AS87645 YT19895 RS04198 ); $xmlout = ''; @opts = ( Record => 'part', Keys => [ [ '.' => sub { my @nums = map { /(\d+)/ } @_; $nums[0] <=> $nums[1]; } ] ] ); push @opts, @main::TempOpts; $sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout); $sorter->parse_string($xmlin); fix_xml($xmlout); is($xmlout, q( RS04198 YT19895 QX54763 AS87645 ), 'Coderef comparator OK'); ############################################################################## # Test that by default case of keys is significant # $xmlin = q( red Green blue ); $xmlout = ''; @opts = ( Record => 'colour', ); push @opts, @main::TempOpts; $sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout); $sorter->parse_string($xmlin); fix_xml($xmlout); is($xmlout, q( Green blue red ), 'Case is significant by default'); ############################################################################## # But the IgnoreCase option fixes that # $xmlout = ''; @opts = ( Record => 'colour', IgnoreCase => 1, ); push @opts, @main::TempOpts; $sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout); $sorter->parse_string($xmlin); fix_xml($xmlout); is($xmlout, q( blue Green red ), 'IgnoreCase makes case insignificant'); ############################################################################## # Test that by default space in keys is significant # $xmlin = q( red green blue light blue light blue light blue light blue ); $xmlout = ''; @opts = ( Record => 'colour', ); push @opts, @main::TempOpts; $sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout); $sorter->parse_string($xmlin); fix_xml($xmlout); is($xmlout, q( blue light blue light blue light blue red green light blue ), 'Space is significant by default'); ############################################################################## # But the NormaliseKeySpace option fixes that # $xmlout = ''; @opts = ( Record => 'colour', NormaliseKeySpace => 1, ); push @opts, @main::TempOpts; $sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout); $sorter->parse_string($xmlin); fix_xml($xmlout); is($xmlout, q( blue green light blue light blue light blue light blue red ), 'NormaliseKeySpace makes spaces insignificant'); ############################################################################## # And it fixes it for Americanz too # $xmlout = ''; $xmlin = q( red green blue light blue light blue light blue light blue ); $xmlout = ''; @opts = ( Record => 'color', NormalizeKeySpace => 1, # ^======= this is the bit we're testing ); push @opts, @main::TempOpts; $sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout); $sorter->parse_string($xmlin); fix_xml($xmlout); is($xmlout, q( blue green light blue light blue light blue light blue red ), 'And it works for Americanz too'); ############################################################################## # Now try out the KeyFilterSub option. # $xmlout = ''; $xmlin = q( red green orange pink blue ); $xmlout = ''; @opts = ( Record => 'color', KeyFilterSub => sub { map { scalar reverse($_) } @_; }, ); push @opts, @main::TempOpts; $sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout); $sorter->parse_string($xmlin); fix_xml($xmlout); is($xmlout, q( red orange blue pink green ), 'KeyFilterSub does its job'); ############################################################################## # Now try IgnoreCase, NormaliseKeySpace and KeyFilterSub simultaneously (and # at the same time). # $xmlout = ''; $xmlin = q( RED green light blue LIGHT BLUE orange PINK blue ); $xmlout = ''; @opts = ( Record => 'color', NormaliseKeySpace => 1, IgnoreCase => 1, KeyFilterSub => sub { map { scalar reverse($_) } @_; }, ); push @opts, @main::TempOpts; $sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout); $sorter->parse_string($xmlin); fix_xml($xmlout); is($xmlout, q( RED orange blue light blue LIGHT BLUE PINK green ), 'IgnoreCase, NormaliseKeySpace & KeyFilterSub play nicely'); ############################################################################## # Slightly unusual version of KeyFilterSub which combine multiple keys # into one. # $xmlout = ''; $xmlin = q( red green orange BLUE RED Green orange blue ); $xmlout = ''; @opts = ( Record => 'color', Keys => '@prime, asc, desc; .', IgnoreCase => 1, KeyFilterSub => sub { sprintf("%02u%s", @_); }, ); push @opts, @main::TempOpts; $sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout); $sorter->parse_string($xmlin); fix_xml($xmlout); is($xmlout, q( orange green Green blue BLUE orange RED red ), 'Synthetic key generation via KeyFilterSub'); ############################################################################## # Test that text content of '0' doesn't give us grief (any more). # $xmlin = q( 0 9 5 0 7 0 ); $xmlout = ''; @opts = (Record => 'item', Keys => '., num, asc'); push @opts, @main::TempOpts; $sorter = Pipeline( XML::Filter::Sort->new(@opts) => \$xmlout ); $sorter->parse_string($xmlin); fix_xml($xmlout); is($xmlout, q( 0 0 5 7 9 0 ), 'No problem with text content of "0" even in sort key'); ############################################################################## # S U B R O U T I N E S ############################################################################## ############################################################################## # Sometimes the output from the SAX pipeline may not be exactly what we're # expecting - for benign reasons. This routine strips the initial XML # declaration which gets added by LibXML but not by other parsers. It also # changes attribute double quotes to single. # sub fix_xml { $_[0] =~ s{^<\?xml\s.*?\?>\s*}{}s; $_[0] =~ s{(\w+)="([^>]*?)"}{$1='$2'}sg; }