# $Id: 4_workout.t,v 1.3 2005/04/20 20:03:53 grantm Exp $
use strict;
use Test::More;
BEGIN { # Seems to be required by older Perls
unless(eval { require XML::SAX::Writer }) {
plan skip_all => 'XML::SAX::Writer not installed';
}
unless(eval { require XML::SAX::ParserFactory }) {
plan skip_all => 'XML::SAX::ParserFactory not installed';
}
unless(eval { require XML::SAX::Machines }) {
plan skip_all => 'XML::SAX::Machines not installed';
}
}
plan tests => 26;
use XML::Filter::Sort;
use XML::SAX::Machines qw( :all );
$^W = 1;
my(@opts, $xmlin, $xmlout, $sorter);
##############################################################################
# Global used to flag disk rather than memory buffering
#
@main::TempOpts = () unless(@main::TempOpts);
##############################################################################
# Sort using full text content as key (including leading digits)
#
$xmlin = q(
1Zebedee
2Yorick
3Wayne
4Xavier
);
$xmlout = '';
@opts = (Record => 'person');
push @opts, @main::TempOpts;
$sorter = Pipeline(
XML::Filter::Sort->new(@opts) => \$xmlout
);
$sorter->parse_string($xmlin);
fix_xml($xmlout);
is($xmlout, $xmlin, 'Default key to full text content, alpha, asc');
##############################################################################
# Sort using text content of specified child element as a key
#
$xmlout = '';
@opts = (
Record => 'person',
Keys => 'firstname',
);
push @opts, @main::TempOpts;
$sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout);
$sorter->parse_string($xmlin);
fix_xml($xmlout);
is($xmlout, q(
3Wayne
4Xavier
2Yorick
1Zebedee
), 'Parsed key from string and extracted element content');
##############################################################################
# Check that a 'foreign' element in the middle of a sequence of records
# causes the records before and the records after to be sorted as two
# independent lists.
#
$xmlin = q(
1Zebedee
2Yorick
popcorn
3Wayne
4Xavier
0
);
$xmlout = '';
@opts = (
Record => 'person',
Keys => 'firstname',
);
push @opts, @main::TempOpts;
$sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout);
$sorter->parse_string($xmlin);
fix_xml($xmlout);
is($xmlout, q(
2Yorick
1Zebedee
popcorn
3Wayne
4Xavier
0
), 'Sorted two independent lists (element between)');
##############################################################################
# Check that non-whitespace text causes the same effect.
#
$xmlin = q(
1Zebedee
2Yorick
popcorn
3Wayne
4Xavier
);
$xmlout = '';
@opts = (
Record => 'person',
Keys => 'firstname',
);
push @opts, @main::TempOpts;
$sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout);
$sorter->parse_string($xmlin);
fix_xml($xmlout);
is($xmlout, q(
2Yorick
1Zebedee
popcorn
3Wayne
4Xavier
), 'Sorted two independent lists (text between - easy case)');
##############################################################################
# Repeat that last test with slightly different input data to expose a flaw
# which probably ought to be fixed.
#
$xmlin = q(
1Zebedee
2Yorick
popcorn
4Xavier
3Wayne
);
$xmlout = '';
@opts = (
Record => 'person',
Keys => 'firstname',
);
push @opts, @main::TempOpts;
$sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout);
$sorter->parse_string($xmlin);
fix_xml($xmlout);
TODO: { local $TODO = 'Trailing whitespace on leading text not quite done';
is($xmlout, q(
2Yorick
1Zebedee
popcorn
3Wayne
4Xavier
), 'Sorted two independent lists (text between - pathological case)');
}
##############################################################################
# Now do a similar test with a comment separating the two record lists.
#
$xmlin = q(
1Zebedee
2Yorick
4Xavier
3Wayne
);
$xmlout = '';
@opts = (
Record => 'person',
Keys => 'firstname',
);
push @opts, @main::TempOpts;
$sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout);
$sorter->parse_string($xmlin);
fix_xml($xmlout);
is($xmlout, q(
2Yorick
1Zebedee
3Wayne
4Xavier
), 'Sorted two independent lists (comment between)');
##############################################################################
# Same again but with a processing instruction separating the two record lists.
#
$xmlin = q(
1Zebedee
2Yorick
4Xavier
3Wayne
);
$xmlout = '';
@opts = (
Record => 'person',
Keys => 'firstname',
);
push @opts, @main::TempOpts;
$sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout);
$sorter->parse_string($xmlin);
fix_xml($xmlout);
is($xmlout, q(
2Yorick
1Zebedee
3Wayne
4Xavier
), 'Sorted two independent lists (PI between)');
##############################################################################
# Check that as each record is buffered, reordered and spat back out, it
# retains its own leading whitespace.
#
$xmlin = q(
Zebedee
Yorick
Wayne
Xavier
);
$xmlout = '';
@opts = (
Record => 'person',
Keys => 'firstname',
);
push @opts, @main::TempOpts;
$sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout);
$sorter->parse_string($xmlin);
fix_xml($xmlout);
is($xmlout, q(
Wayne
Xavier
Yorick
Zebedee
), 'Funky indentation preserved');
##############################################################################
# Throw a namespace definition into the mix and confirm it is ignored.
#
$xmlin = q(
1Zebedee
2Yorick
3Wayne
4Xavier
);
$xmlout = '';
@opts = (
Record => 'person',
Keys => 'firstname',
);
push @opts, @main::TempOpts;
$sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout);
$sorter->parse_string($xmlin);
fix_xml($xmlout);
is($xmlout, q(
3Wayne
4Xavier
2Yorick
1Zebedee
), 'Record selection with optional namespace works');
##############################################################################
# Now sort only the records with no namespace
#
$xmlin = q(
1Zebedee
2Yorick
3Wayne
4Xavier
5Vernon
6Trevor
7Ulbrecht
);
$xmlout = '';
@opts = (
Record => '{}person',
Keys => 'firstname',
);
push @opts, @main::TempOpts;
$sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout);
$sorter->parse_string($xmlin);
fix_xml($xmlout);
is($xmlout, q(
1Zebedee
3Wayne
2Yorick
4Xavier
6Trevor
7Ulbrecht
5Vernon
), 'Record selection with no namespace works');
##############################################################################
# Now sort only the records with specified namespace
#
$xmlin = q(
1Zebedee
2Yorick
3Wayne
4Xavier
5Vernon
6Trevor
7Ulbrecht
);
$xmlout = '';
@opts = (
Record => '{bob.com}person',
Keys => 'firstname',
);
push @opts, @main::TempOpts;
$sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout);
$sorter->parse_string($xmlin);
fix_xml($xmlout);
is($xmlout, q(
3Wayne
2Yorick
1Zebedee
4Xavier
5Vernon
6Trevor
7Ulbrecht
), 'Record selection with specific namespace works');
##############################################################################
# Put some comments into the mix and confirm they are handled correctly.
#
$xmlin = q(
Zebedee
Xavier
Yorick
);
$xmlout = '';
@opts = (
Record => 'person',
Keys => 'firstname',
);
push @opts, @main::TempOpts;
$sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout);
$sorter->parse_string($xmlin);
fix_xml($xmlout);
is($xmlout, q(
Xavier
Yorick
Zebedee
), 'Buffering of comments works');
##############################################################################
# Do the same with processing instructions.
#
$xmlin = q(
Zebedee
Xavier
Yorick
);
$xmlout = '';
@opts = (
Record => 'person',
Keys => 'firstname',
);
push @opts, @main::TempOpts;
$sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout);
$sorter->parse_string($xmlin);
fix_xml($xmlout);
is($xmlout, q(
Xavier
Yorick
Zebedee
), 'Buffering of PIs works');
##############################################################################
# Run a multi-key sort - two alpha keys.
#
$xmlin = q(
This is a list of names & ages
Zebedee
Boozle
Yorick
Cabbage
Yorick
Cabbage
Xavier
Aardvark
);
$xmlout = '';
@opts = (
Record => 'person',
Keys => '
lastname
firstname
@age
',
);
push @opts, @main::TempOpts;
$sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout);
$sorter->parse_string($xmlin);
fix_xml($xmlout);
is($xmlout, q(
This is a list of names & ages
Xavier
Aardvark
Zebedee
Boozle
Yorick
Cabbage
Yorick
Cabbage
), 'Multi-element records and multi-key sort OK');
##############################################################################
# Introduce a third sort key - numeric.
#
$xmlout = '';
@opts = (
Record => 'person',
Keys => '
lastname, alpha, asc
firstname, alpha, asc
@age, num, asc
',
);
push @opts, @main::TempOpts;
$sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout);
$sorter->parse_string($xmlin);
fix_xml($xmlout);
is($xmlout, q(
This is a list of names & ages
Xavier
Aardvark
Zebedee
Boozle
Yorick
Cabbage
Yorick
Cabbage
), 'Numeric sort key OK');
##############################################################################
# Check that descending order works for both alpha and numeric sorts
#
$xmlout = '';
@opts = (
Record => 'person',
Keys => '
firstname, alpha, desc
lastname, alpha, asc
@age, num, desc
',
);
push @opts, @main::TempOpts;
$sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout);
$sorter->parse_string($xmlin);
fix_xml($xmlout);
is($xmlout, q(
This is a list of names & ages
Zebedee
Boozle
Yorick
Cabbage
Yorick
Cabbage
Xavier
Aardvark
), 'Descending order OK');
##############################################################################
# Use a code reference rather than alpha or numeric comparator
#
$xmlin = q(
QX54763
AS87645
YT19895
RS04198
);
$xmlout = '';
@opts = (
Record => 'part',
Keys => [
[ '.' => sub {
my @nums = map { /(\d+)/ } @_;
$nums[0] <=> $nums[1];
} ]
]
);
push @opts, @main::TempOpts;
$sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout);
$sorter->parse_string($xmlin);
fix_xml($xmlout);
is($xmlout, q(
RS04198
YT19895
QX54763
AS87645
), 'Coderef comparator OK');
##############################################################################
# Test that by default case of keys is significant
#
$xmlin = q(
red
Green
blue
);
$xmlout = '';
@opts = (
Record => 'colour',
);
push @opts, @main::TempOpts;
$sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout);
$sorter->parse_string($xmlin);
fix_xml($xmlout);
is($xmlout, q(
Green
blue
red
), 'Case is significant by default');
##############################################################################
# But the IgnoreCase option fixes that
#
$xmlout = '';
@opts = (
Record => 'colour',
IgnoreCase => 1,
);
push @opts, @main::TempOpts;
$sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout);
$sorter->parse_string($xmlin);
fix_xml($xmlout);
is($xmlout, q(
blue
Green
red
), 'IgnoreCase makes case insignificant');
##############################################################################
# Test that by default space in keys is significant
#
$xmlin = q(
red
green
blue
light blue
light blue
light blue
light blue
);
$xmlout = '';
@opts = (
Record => 'colour',
);
push @opts, @main::TempOpts;
$sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout);
$sorter->parse_string($xmlin);
fix_xml($xmlout);
is($xmlout, q(
blue
light blue
light blue
light blue
red
green
light blue
), 'Space is significant by default');
##############################################################################
# But the NormaliseKeySpace option fixes that
#
$xmlout = '';
@opts = (
Record => 'colour',
NormaliseKeySpace => 1,
);
push @opts, @main::TempOpts;
$sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout);
$sorter->parse_string($xmlin);
fix_xml($xmlout);
is($xmlout, q(
blue
green
light blue
light blue
light blue
light blue
red
), 'NormaliseKeySpace makes spaces insignificant');
##############################################################################
# And it fixes it for Americanz too
#
$xmlout = '';
$xmlin = q(
red
green
blue
light blue
light blue
light blue
light blue
);
$xmlout = '';
@opts = (
Record => 'color',
NormalizeKeySpace => 1,
# ^======= this is the bit we're testing
);
push @opts, @main::TempOpts;
$sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout);
$sorter->parse_string($xmlin);
fix_xml($xmlout);
is($xmlout, q(
blue
green
light blue
light blue
light blue
light blue
red
), 'And it works for Americanz too');
##############################################################################
# Now try out the KeyFilterSub option.
#
$xmlout = '';
$xmlin = q(
red
green
orange
pink
blue
);
$xmlout = '';
@opts = (
Record => 'color',
KeyFilterSub => sub { map { scalar reverse($_) } @_; },
);
push @opts, @main::TempOpts;
$sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout);
$sorter->parse_string($xmlin);
fix_xml($xmlout);
is($xmlout, q(
red
orange
blue
pink
green
), 'KeyFilterSub does its job');
##############################################################################
# Now try IgnoreCase, NormaliseKeySpace and KeyFilterSub simultaneously (and
# at the same time).
#
$xmlout = '';
$xmlin = q(
RED
green
light blue
LIGHT BLUE
orange
PINK
blue
);
$xmlout = '';
@opts = (
Record => 'color',
NormaliseKeySpace => 1,
IgnoreCase => 1,
KeyFilterSub => sub { map { scalar reverse($_) } @_; },
);
push @opts, @main::TempOpts;
$sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout);
$sorter->parse_string($xmlin);
fix_xml($xmlout);
is($xmlout, q(
RED
orange
blue
light blue
LIGHT BLUE
PINK
green
), 'IgnoreCase, NormaliseKeySpace & KeyFilterSub play nicely');
##############################################################################
# Slightly unusual version of KeyFilterSub which combine multiple keys
# into one.
#
$xmlout = '';
$xmlin = q(
red
green
orange
BLUE
RED
Green
orange
blue
);
$xmlout = '';
@opts = (
Record => 'color',
Keys => '@prime, asc, desc; .',
IgnoreCase => 1,
KeyFilterSub => sub { sprintf("%02u%s", @_); },
);
push @opts, @main::TempOpts;
$sorter = Pipeline(XML::Filter::Sort->new(@opts) => \$xmlout);
$sorter->parse_string($xmlin);
fix_xml($xmlout);
is($xmlout, q(
orange
green
Green
blue
BLUE
orange
RED
red
), 'Synthetic key generation via KeyFilterSub');
##############################################################################
# Test that text content of '0' doesn't give us grief (any more).
#
$xmlin = q(
0
- 9
- 5
- 0
- 7
0
);
$xmlout = '';
@opts = (Record => 'item', Keys => '., num, asc');
push @opts, @main::TempOpts;
$sorter = Pipeline(
XML::Filter::Sort->new(@opts) => \$xmlout
);
$sorter->parse_string($xmlin);
fix_xml($xmlout);
is($xmlout, q(
0
- 0
- 5
- 7
- 9
0
), 'No problem with text content of "0" even in sort key');
##############################################################################
# S U B R O U T I N E S
##############################################################################
##############################################################################
# Sometimes the output from the SAX pipeline may not be exactly what we're
# expecting - for benign reasons. This routine strips the initial XML
# declaration which gets added by LibXML but not by other parsers. It also
# changes attribute double quotes to single.
#
sub fix_xml {
$_[0] =~ s{^<\?xml\s.*?\?>\s*}{}s;
$_[0] =~ s{(\w+)="([^>]*?)"}{$1='$2'}sg;
}