=head1 NAME Data::Sorting - Multi-key sort using function results =head1 SYNOPSIS use Data::Sorting qw( :basics :arrays :extras ); # Sorting functions default to simple string comparisons @names = qw( Bob Alice Ellen Charlie David ); @ordered = sorted_by( undef, @names ); # Various options can be passed before the list values @ordered = sorted_by( [ -order=>'reverse' ], @names ); # You can also generate a sorting function and then apply it $function = sort_function(); @ordered = $function->( @names ); # or &{$function}(@names) @ordered = sort_function( -order=>'reverse' )->( @names ); # The :array functions are prototyped to take the array first @ordered = sorted_array( @names ); @ordered = sorted_arrayref( \@names ); # You can also sort an array in place, changing its internal order sort_array( @names ); sort_arrayref( \@names ); # There are several sorting options, such as -compare => 'natural' @movies = ( 'The Matrix', 'Plan 9', '2001', 'Terminator 2' ); @ordered = sort_function( -compare => 'natural' )->( @movies ); # @ ordered now contains '2001', 'The Matrix', 'Plan 9', 'Terminator 2' # To sort numbers, pass the -compare => 'numeric' option @numbers = ( 18, 5, 23, 42, 156, 91, 64 ); @ordered = sorted_by( [ -compare => 'numeric' ], @numbers ); @ordered = sort_function( -compare => 'numeric' )->( @numbers ); @ordered = sorted_array( @numbers, -compare => 'numeric' ); sort_array( @numbers, -compare => 'numeric' ); # You can sort by the results of a function to be called on each item sort_array( @numbers, -compare => 'numeric', sub { $_[0] % 16 } ); # @numbers now contains 64, 18, 5, 23, 42, 91, 156 # For arrays of datastructures, pass in keys to extract for sorting @records = ( { 'rec_id'=>3, 'name'=>{'first'=>'Bob', 'last'=>'Macy'} }, { 'rec_id'=>1, 'name'=>{'first'=>'Sue', 'last'=>'Jones'} }, { 'rec_id'=>2, 'name'=>{'first'=>'Al', 'last'=>'Jones' } }, ); @ordered = sorted_array( @records, 'rec_id' ); # For nested data structures, pass an array of keys to fetch @ordered = sorted_array( @records, ['name','first'] ); # Pass multiple sort keys for multiple-level sorts @ordered = sorted_array( @records, ['name','last'], ['name','first'] ); # Any selected sort options are applied to all subsequent sort keys @ordered = sorted_array( @records, -order => 'reverse', ['name','last'], ['name','first'] ); # Options specified within a hash-ref apply only to that key @ordered = sorted_array( @records, { order=>'reverse', sortkey=>['name','last'] }, ['name','first'] ); # Locale support is available if you have Perl 5.004 or later and POSIX POSIX::setlocale( POSIX::LC_COLLATE(), 'en_US' ); POSIX::setlocale( POSIX::LC_CTYPE(), 'en_US' ); @ordered = sorted_array( @records, -compare=>'locale', ['name','last'], ['name','first'] ); =head1 ABSTRACT Data::Sorting provides functions to sort the contents of arrays based on a collection of extraction and comparison rules. Extraction rules are used to identify the attributes of array elements on which the ordering is based; comparison rules specify how those values should be ordered. Index strings may be used to retrieve values from array elements, or function references may be passed in to call on each element. Comparison rules are provided for numeric, bytewise, and case-insensitive orders, as well as a 'natural' comparison that places numbers first, in numeric order, followed by the remaining items in case-insensitive textual order. =head1 DESCRIPTION This module provides several public functions with different calling interfaces that all use the same underlying sorting mechanisms. These functions may be imported individually or in groups using the following tags: =over 9 =item :basics sorted_by(), sort_function(): General-purpose sorting functions. =item :array sorted_array(), sorted_arrayref(), sort_array(), sort_arrayref(): Prototyped functions for arrays. =item :extras sort_key_values(), sort_description(): Two accessory functions that explain how sorting is being carried out. =back All of these functions take a list of sorting rules as arguments. See L<"Sort Rule Syntax"> for a discussion of the contents of the $sort_rule or @sort_rules parameters shown below. =cut ######################################################################## package Data::Sorting; require 5.003; use strict; use Carp; use Exporter; use vars qw( $VERSION @ISA %EXPORT_TAGS ); $VERSION = 0.9; push @ISA, qw( Exporter ); %EXPORT_TAGS = ( basics => [qw( sorted_by sort_function )], arrays => [qw( sorted_array sorted_arrayref sort_array sort_arrayref)], extras => [qw( sort_key_values sort_description )], ); Exporter::export_ok_tags( keys %EXPORT_TAGS ); use vars qw( @Array @Rules $PreCalculate $Rule @ValueSet ); ######################################################################## =head2 sorted_by @ordered = sorted_by( $sort_rule, @value_array ); @ordered = sorted_by( $sort_rule, @$value_arrayref ); @ordered = sorted_by( $sort_rule, $value1, $value2, $value3 ); @ordered = sorted_by( \@sort_rules, @value_array ); @ordered = sorted_by( \@sort_rules, @$value_arrayref ); @ordered = sorted_by( \@sort_rules, $value1, $value2, $value3 ); This is a general-purpose sorting function which accepts one or more sort order rules and a list of input values, then returns the values in the order specified by the rules. =cut # @in_order = sorted_by( $sort_rules_ary, @values ); sub sorted_by ($;@) { my @sort_params = ( ! defined $_[0] ) ? () : ( ref($_[0]) eq 'ARRAY' ) ? @{ (shift) } : shift; ( my $sorter, local @Rules ) = _parse_sort_args( @sort_params ); local *Array = \@_; &$sorter; } ######################################################################## =head2 sort_function @ordered = sort_function( @sort_rules )->( @value_array ); @ordered = sort_function( @sort_rules )->( @$value_arrayref ); @ordered = sort_function( @sort_rules )->( $value1, $value2, $value3 ); Creates an anonymous function which applies the provided sort rules. The function may be cached and used multiple times to apply the same rules again. =cut # @in_order = sort_function( @sort_rules )->( @array ); sub sort_function (@) { my ( $sorter, @rules ) = _parse_sort_args( @_ ); return sub { local *Array = \@_; local @Rules = @rules; my @results = &$sorter; # Kludge to clear extracted data; there's gotta be a better way... foreach my $rule (@rules) { map { delete $rule->{$_} } grep /^ext_/, keys %$rule } @results; } } ######################################################################## =head2 sorted_array @ordered = sorted_array( @value_array, @sort_rules ); @ordered = sorted_array( @$value_arrayref, @sort_rules ); Returns a sorted list of the items without altering the order of the original list. =cut # @in_order = sorted_array( @array, @sort_rules ); sub sorted_array (\@;@) { local *Array = shift; ( my $sorter, local @Rules ) = _parse_sort_args( @_ ); &$sorter; } =head2 sorted_arrayref @ordered = sorted_arrayref( \@value_array, @sort_rules ); @ordered = sorted_arrayref( $value_arrayref, @sort_rules ); Returns a sorted list of the items without altering the order of the original list. =cut # @in_order = sorted_arrayref( $array_ref, @sort_rules ); sub sorted_arrayref ($;@) { local *Array = shift; ( my $sorter, local @Rules ) = _parse_sort_args( @_ ); &$sorter; } ######################################################################## =head2 sort_array sort_array( @value_array, @sort_rules ); sort_array( @$value_arrayref, @sort_rules ); Sorts the contents of the specified array using a list of sorting rules. =cut # sort_array( @array, @sort_rules ); sub sort_array (\@;@) { local *Array = shift; ( my $sorter, local @Rules ) = _parse_sort_args( @_ ); @Array = &$sorter; } =head2 sort_arrayref sort_arrayref( \@value_array, @sort_rules ); sort_arrayref( $value_arrayref, @sort_rules ); Equivalent to sort_array, but takes an explicit array reference as its first argument, rather than an array variable. =cut # sort_arrayref( $array_ref, @sort_rules ); sub sort_arrayref ($;@) { local *Array = shift; ( my $sorter, local @Rules ) = _parse_sort_args( @_ ); @Array = &$sorter; } ######################################################################## =head2 sort_key_values @key_values = sort_key_values( \@value_array, @sort_rules ); @key_values = sort_key_values( $value_arrayref, @sort_rules ); Doesn't actually perform any sorting. Extracts and returns the values which would be used as sort keys from each item in the array, in their original order. =cut # @results = sort_key_values( $array, @sort_rules ); sub sort_key_values ($;@) { local *Array = shift; my ($sorter, @rules) = _parse_sort_args( @_ ); if ( scalar @rules == 1 ) { _extract_values_for_rule( $rules[0], @Array ); } else { map [ _extract_values_for_item( $_, @rules ) ], @Array; } } ######################################################################## =head2 sort_description @description = sort_description( $descriptor, @sort_rules ); Doesn't actually perform any sorting. Provides descriptive information about the sort rules for diagnostic purposes. =cut # @sort_rules = sort_description( 'text', @sort_rules ); sub sort_description ($;@) { my $descriptor = shift; my $desc_func; if ( ! $descriptor ) { $desc_func = \&_desc_text; } elsif ( ref($descriptor) eq 'CODE' ) { $desc_func = $descriptor; } elsif ( ! ref($descriptor) ) { no strict 'refs'; $desc_func = \&{"_desc_$descriptor"} or croak("Can't find a function named '_desc_$descriptor'"); } else { croak("Unsupported descriptor '$descriptor'") } my ($sorter, @rules) = _parse_sort_args( @_ ); map { &$desc_func( $_ ) } @rules; } sub _desc_text { my $rule = shift; my $comp = $rule->{compare}; $rule->{extract} . join( '', map $_ ? "($_) " : " ", join(', ', map "'$_'", @{ $rule->{extract_args} }) ) . "in " . ( $rule->{order_sign} < 0 ? "descending" : "ascending" ) . " " . ( ! ref($comp) ? "$comp" : ref($comp) eq 'CODE' ? "with custom function ($comp)": ref($comp) eq 'ARRAY' ? join(', ', @$comp) : "with $comp" ) . " order" } ######################################################################## =head2 Sort Rule Syntax The sort rule argument list may contain several different types of parameters, which are parsed identically by all of the public functions described above. A sort rule definition list may contain any combination of the following argument structures: =over 4 =item I If no sort keys are specified, a default sort key is created using the C "self"> option. @ordered = sorted_array( @names ); =item I Specifies a sort key. Each I may be either a scalar value, or an array reference. Appropriate values for a I vary depending on which "extract" option is being used, and are discussed further below. @ordered = sorted_array( @numbers, sub { $_[0] % 8 } ); @ordered = sorted_array( @records, 'rec_id' ); @ordered = sorted_array( @records, ['name','first'] ); Any number of sortkeys may be provided: @ordered = sorted_array( @records, ['name','last'], ['name','first'] ); =item -sortkey => I Another way of specifying a sort key is by preceding it with the "-sortkey" flag. @ordered = sorted_array( @numbers, -sortkey => sub { $_[0] % 8 } ); @ordered = sorted_array( @records, -sortkey => ['name','last'], -sortkey => ['name','first'] ); =item { sortkey => I, I