package Data::Pivoter;
use strict;
use vars qw($VERSION);
$VERSION='0.08';
=head1 NAME
Data::Pivoter - Perl extension for pivot / cross tabulation of data
=head1 SYNOPSIS
$pivoter = Table::Pivoter->new(col=>
, row=> ,
data=> ,
group=> ,
function=> ,
numeric => ,
donotvalidate=> ,
test=>);
$pivotedtableref = $pivoter->pivot(\@rawtable)
if $pivoter->ok;
=head1 DESCRIPTION
A pivot object is created using new. Various parameters may be specified to
alter how the table is pivoted. The actual pivot of a table is perfomed using the method pivot.
=cut
use vars '$AUTOLOAD';
use Data::Dumper;
use Carp;
my $debug = $ENV{PIVOTER_DEBUG} || 0;
=head1 Methods
=head2 new
Table::Pivoter->new(col=>, row=>, data=>,
group=>, function=>, numeric=>,
donotvalidate=>,
test=>boolean);
Creates a new pivoter object where is the column containing data going
to be the column headings, is the column going to row headings and
is the data column. is a column used for higher level grouping,
i.e. splitting the data into different tables.
{numerical} is used to flag that the data are numerical so that the correct
sorting function is being used.
{function} is a function to
compile the data set for each row/col combination (Still not implemented) If
no function is given, the last value for each data point is returned.
The
inputdata to new are validated to check that row,col, and data are defined and
that row and col differs. If this behaviour for some reason not is wanted,
donotvalidate can be set to a true value. The property test may be set to avoid
output from the validation (esp for the internal testing). To check for a well-
defined pivoter object, call the method ok.
Planned features (except for implementing the compilation function) includes to
add customizable sorting functions for rows and columns.
=cut
sub _validate{
# Checks if a pivoter object is well-defined
my $self=shift;
# col, row and data must be defined
my $validated =
defined $self->{_colhead} &&
defined $self->{_rowhead} &&
defined $self->{_data};
# If all are defined, must check that row and column are different rows
$validated = not($self->{_colhead} == $self->{_rowhead})
if $validated;
local $^W=0;
carp ("Definition error:
Col = $self->{_colhead}
Row = $self->{_rowhead}
Data= $self->{_data}\n") unless $validated || $self->{_testing};
return $validated;
}
sub _keysort{
my $self = shift;
my $href = shift;
my $i = shift;
my $sortfunc=$i eq 'C'?$self->{_sortfunccol}:$self->{_sortfuncrow};
$i=0;
foreach my $key (sort {&$sortfunc} keys %$href){
$href->{$key}=++$i if defined $key;
print "Key: $key [$i]\n" if $debug > 2;
}
}
sub new{
my $class = shift;
my %para=@_;
print "[C,R,D,G]:$para{col},$para{row},$para{data},$para{group}\n" if $debug;
print "Don't validate\n" if $debug and $para{donotvalidate};
print "Function: $para{function}\n" if $debug and $para{function};
my $self = {
_colhead => $para{col},
_rowhead => $para{row},
_data => $para{data},
_function=> $para{function},
_group => $para{group},
_donotvalidate =>$para{donotvalidate},
_numeric => $para{numeric},
_testing => $para{test}
};
print Dumper(\$self) if $debug>9;
print "New[R,C] : $self->{_rowhead},$self->{_colhead}\n" if $debug >3;
carp("Sorry, functions are still not working in Data::Pivoter...\n")
if $self->{_function};
{
local $^W=0; # Turns of warnings to avoid lots of
# "Use of uninitialized value in pattern match"
if ($self->{_numeric}=~/C/i){
$self->{_sortfunccol}= sub {$a <=> $b} }
else{
$self->{_sortfunccol}= sub {$a cmp $b} };
if ($self->{_numeric}=~/R/i){
$self->{_sortfuncrow}= sub {$a <=> $b} }
else{
$self->{_sortfuncrow}= sub {$a cmp $b} };
}
bless $self,$class;
$self->{_OK}=$self->{_donotvalidate} || $self->_validate ;
return $self;
}
=head2 pivot
@pivotedtable = $pivoter->pivot (@rawtable);
The pivoter method actually performs the pivot with the parameters given in new
and returns the pivoted table.
=cut
sub pivot{
my $self = shift;
my($table,$rows,$r,$c,$g,%rkeys,%ckeys,%gkeys,%hashtable,@pivot, @table);
@table = @{ shift() }; # Throws in a ref, needs the table
print "Pivot[R,C]: $self->{_rowhead},$self->{_colhead}\n" if $debug > 3;
for ($rows = 0;$rows < @table;$rows++){
print "[\$rows: $rows]Pivot[R,C]: $self->{_rowhead},$self->{_colhead}\n"
if $debug > 3;
print "row :>$table[$rows][$self->{_rowhead}]<\n" if $debug > 3;
print "col :>$table[$rows][$self->{_colhead}]<\n" if $debug > 3;
my $row = $table[$rows][$self->{_rowhead}];
my $col = $table[$rows][$self->{_colhead}];
my $group;
# Collects the unique row, col and group values
$rkeys{$row}=++$r unless $rkeys{$row};
$ckeys{$col}=++$c unless $ckeys{$col};
if ($self->{_group}){
$group = $table[$rows][$self->{_group}];
$gkeys{$group}=++$g unless $gkeys{$group};
}
my $ref; # Referres to the element in the pivot hash
if (defined $group){
$ref=\$hashtable{$row}{$col}{$group}
}else{
$ref=\$hashtable{$row}{$col}
}
unless ($self->{_function}){
# No function is defined, just picks up the value
$$ref=$table[$rows][$self->{_data}];
}else{
push @$ref, \$table[$rows][$self->{_data}];
# Treats the $ref as an array reference and
# collects the data into that array to use the given function on them
# after all the data have been collected.
}
}
# Preparing the correct sorting of the data
$self->_keysort(\%rkeys,'R');
$self->_keysort(\%ckeys,'C');
# [0][0] is always undef
$c=1; # Puts in the row headers in the pivottable:
foreach my $colkey (sort {&{$self->{_sortfunccol}}} keys %ckeys){
$pivot[0][$c++] = $colkey;
}
# The row and col headers are in the first column and row
foreach my $rowkey (sort {&{$self->{_sortfuncrow}}} keys %rkeys){
# Puts in the col headers:
$pivot[$rkeys{$rowkey}][0] = $rowkey;
foreach my $colkey (sort {&{$self->{_sortfunccol}}} keys %ckeys){
# foreach my $colkey (sort {&{$self->{_sortfunccol}}} keys %ckeys){
# Puts in the values in the finished table:
$pivot[$rkeys{$rowkey}][$ckeys{$colkey}] = $hashtable{$rowkey}{$colkey};
}
}
print '@pivot : ',Dumper(\@pivot) if $debug > 5;
if ($self->{_function}){
for ($r=1,@pivot,$r++){
my $warn = $^W;
$^W=undef;
my $row=$pivot[$r];
for ($c=1,@{$row},$c++){
print "[$r,$c] @{$pivot[$r][$c]}" if $debug > 2;
# eval{$pivot[$r][$c]= eval{$self->{_function}(@{$pivot[$r][$c]})}};
eval{${$pivot[$r][$c]}=$self->{_function}};
}
$^W=$warn;
}
print "\n" if $debug >2;
}
return \@pivot;
}
=head2 ok
The method may be called to see if the pivoter object is well-defined. If donotvalidate is set, then this method will always return true.
=cut
sub ok{
my $self=shift;
return $self->{_OK}
}
=head3 New algorithms
A possible enhancement is to use two different types of functions for
compilation, one which needs all the data avaliable to perform the calculation,
another that can can be applied to the data before all the datapoints are
known, (e.g. to return the max value from the data set) to avoid going through
the data set twice when possible
=cut
=head1 System variables
The variable PIVOTER_DEBUG may be set to get debugging output. A higher numerical
value gives more output.
=cut
=head1 Licencing
This module is distributed under the artistic licence, i.e. the same licence at Perl itself.
=cut
=head1 AUTHOR
Morten A.K. Sickel, Morten.Sickel@newmedia.no
=head1 SEE ALSO
perl(1).
=cut
1;