# $Id: MultipleFields.pm,v 1.10 2008/07/25 21:42:44 drhyde Exp $ package Sort::MultipleFields; use strict; use warnings; use vars qw($VERSION @EXPORT_OK @ISA); use Scalar::Util qw(reftype); use Exporter; # 5.6's Exporter doesn't export its import function, so # need to do the inheritance dance. Joy. @ISA = qw(Exporter); @EXPORT_OK = qw(mfsort mfsortmaker); $VERSION = '1.0'; =head1 NAME Sort::MultipleFields - Conveniently sort on multiple fields =head1 SYNOPSIS use Sort::MultipleFields qw(mfsort); my $library = mfsort { author => 'ascending', title => 'ascending' } ( { author => 'Hoyle, Fred', title => 'Black Cloud, The' }, { author => 'Clarke, Arthur C', title => 'Rendezvous with Rama' }, { author => 'Clarke, Arthur C', title => 'Islands In The Sky' } ); after which C<$library> would be a reference to a list of three hashrefs, which would be (in order) the data for "Islands In The Sky", "Rendezvous with Rama", and "The Black Cloud". =head1 DESCRIPTION This provides a simple way of sorting structured data with multiple fields. For instance, you might want to sort a list of books first by author and within each author sort by title. =head1 EXPORTS The subroutines may be exported if you wish, but are not exported by default. Default-export is bad and wrong and people who do it should be spanked. =head1 SUBROUTINES =head2 mfsort @sorted = mfsort { SORT SPEC } @unsorted; Takes a sort specification and a list (or list-ref) of references to hashes. It returns either a list or a list-ref, depending on context. The sort specification is a block structured thus: { field1 => 'ascending', field2 => 'descending', field3 => sub { lc($_[0]) cmp lc($_[1]) # case-insensitive ascending }, ... } Yes, it looks like a hash. But it's not, it's a block that returns a list, and order matters. The spec is a list of pairs, each consisting of a field to sort on, and how to sort it. How to sort is simply a function that, when given a pair of pieces of data, will return -1, 0 or 1 depending on whether the first argument is "less than", equal to, or "greater than" the second argument. Sounds familiar, doesn't it. As short-cuts for the most common sorts, the following case-insensitive strings will work: =over =item ascending, or asc Sort ASCIIbetically, ascending (ie C<$a cmp $b>) =item descending, or desc Sort ASCIIbetically, descending (ie C<$b cmp $a>) =item numascending, or numasc Sort numerically, ascending (ie C<$a <=> $b>) =item numdescending, or numdesc Sort numerically, descending (ie C<$b <=> $a>) =back Really old versions of perl might require that you instead pass the sort spec as an anonymous subroutine. mfsort sub { ... }, @list =cut sub mfsort(&@) { my $spec = shift; my @records = @_; @records = @{$records[0]} if(reftype($records[0]) eq 'ARRAY'); (grep { reftype($_) ne 'HASH' } @records) && die(__PACKAGE__."::mfsort: Can only sort hash-refs\n"); my $sortsub = mfsortmaker($spec); @records = sort { $sortsub->($a, $b) } @records; return wantarray() ? @records : \@records; } =head2 mfsortmaker This takes a sort spec subroutine reference like C but returns a reference to a subroutine that you can use with the built-in C function. my $sorter = mfsortmaker(sub { author => 'asc', title => 'asc' }); @sorted = sort $sorter @unsorted; Note that you need to store the generated subroutine in a variable before using it, otherwise the parser gets confused. Using this function to generate functions for C to use should be considered to be experimental, as it can make some versions of perl segfault. It appears to be reliable if you do this: my $sorter = mfsortmaker(...); @sorted = sort { $sorter->($a, $b) } @unsorted; and that's what the C function does internally. =cut sub mfsortmaker { my $spec = shift; my @spec = $spec->(); my $sortsub = sub($$) { 0 }; # default is to not sort at all while(@spec) { # eat this from the end towards the beginning my($spec, $field) = (pop(@spec), pop(@spec)); die(__PACKAGE__."::mfsortmaker: malformed spec after $field\n") unless(defined($spec)); if(!ref($spec)) { # got a string $spec = ($spec =~ /^asc(ending)?$/i) ? sub { $_[0] cmp $_[1] } : ($spec =~ /^desc(ending)?$/i) ? sub { $_[1] cmp $_[0] } : ($spec =~ /^numasc(ending)?$/i) ? sub { $_[0] <=> $_[1] } : ($spec =~ /^numdesc(ending)?$/i) ? sub { $_[1] <=> $_[0] } : die(__PACKAGE__."::mfsortmaker: Unknown shortcut '$spec'\n"); } my $oldsortsub = $sortsub; $sortsub = sub($$) { $spec->($_[0]->{$field}, $_[1]->{$field}) || $oldsortsub->($_[0], $_[1]) } } # extra layer of wrapping seems to prevent segfaults in 5.8.8. WTF? # return $sortsub return sub($$) { # use Data::Dumper;print(map { Dumper($_) } @_);print "\n\n"; $sortsub->(@_) }; } =head1 BUGS, LIMITATIONS and FEEDBACK If you find undocumented bugs please report them either using L or by email. Ideally, I would like to receive sample data and a test file, which fails with the latest version of the module but will pass when I fix the bug. For some unknown reason, passing C a particularly complex subroutine generated using mfsortmaker can sometimes make perl 5.8.8 (and possibly earlier versions) segfault. I *think* I've worked around it, and at least it doesn't happen for me any more, but YMMV. It was something of a Heisenbug so the current fix doesn't fill me with confidence. =cut =head1 SEE ALSO L for sorting data consisting of strings with fixed-length fields in them. =head1 AUTHOR, COPYRIGHT and LICENCE Copyright 2008 David Cantrell Edavid@cantrell.org.ukE This software is free-as-in-speech software, and may be used, distributed, and modified under the terms of either the GNU General Public Licence version 2 or the Artistic Licence. It's up to you which one you use. The full text of the licences can be found in the files GPL2.txt and ARTISTIC.txt, respectively. =head1 CONSPIRACY This module is also free-as-in-mason software. =cut 1;