# $Id: Dumper.pm 456 2009-04-15 12:20:59Z fil $ package Data::Tabular::Dumper; use strict; use vars qw( $VERSION @ISA @EXPORT_OK ); use Carp; $VERSION="0.08"; require Exporter; @ISA = qw( Exporter ); @EXPORT_OK = qw( Dump ); ########################################################### sub open { my($package, %writers)=@_; my $self=bless {writers=>{}, fields=>[]}, $package; $self->{master_key} = delete $writers{master_key}; $self->{master_key} = '' unless defined $self->{master_key}; my($object, $one); WRITER: foreach my $p1 (keys %writers) { foreach my $p2 ($p1, __PACKAGE__.'::'.$p1) { if($p2->can('open') and $p2->can('close') and $p2->can('write')) { $package=$p2 ; eval { $object=$package->open($writers{$p1}); }; carp $@ if $@; if($object) { $self->{writers}{$package}=$object; $one=1; } next WRITER; } } carp "Could not find a valid package for $p1 (".__PACKAGE__."::$p1)"; } return unless $one; return $self; } ########################################################### sub master_key { my( $self, $new_master ) = @_; my $ret = $self->{master_key}; $self->{master_key} = $new_master if 2 == @_; return $ret; } ########################################################### # Perform $name->() on all the writers. sub _doall { my($name)=@_; return sub { my $self=shift @_; my $n; foreach my $o (values %{$self->{writers}}) { my $code=$o->can($name); if($code) { $code->($o, @_); $n++ unless $@; } else { carp "Object $o can not do $name"; } carp $@ if $@; } return $n; }; } ########################################################### *fields=_doall('fields'); *write=_doall('write'); *page_start=_doall('page_start'); *page_end=_doall('page_end'); ########################################################### sub close { my( $self )= @_; my @ret; foreach my $o ( values %{$self->{writers}} ) { next unless $o->can( 'close' ); push @ret, $o->close(); } return @ret; } ########################################################### sub DESTROY { $_[0]->close; } ########################################################### sub available { my($package)=@_; my(%res, $yes); foreach my $p (qw(CSV XML Excel)) { $yes=0; $yes=1 if exists $INC{"Data/Tabular/Dumper/$p.pm"}; unless($yes) { local $SIG{__DIE__}='DEFAULT'; local $SIG{__WARN__}='IGNORE'; $yes=eval "require Data::Tabular::Dumper::$p; 1;"; # warn $@ if $@ and $ENV{PERL_DL_NONLAZY}; }; $res{$p}=$yes; } return \%res unless wantarray; return grep {$res{$_}} keys %res; } ########################################################### sub Dump { return __PACKAGE__->dump( @_ ); } ########################################################### sub dump { my( $self, $data ) = @_; my $ret; unless( ref $self ) { require Data::Tabular::Dumper::String; $self = $self->open( String => \$ret, master_key=>'KEY' ); } my $state = $self->analyse( $data ); unless( $state->{pages} ) { $self->__dump( $state ); } my $q1=1; foreach my $p ( @{ $state->{pages} } ) { my $name = "Page $q1"; $q1++; $name = $p->{name} if exists $p->{name}; $self->page_start( $name ); $self->__dump( $p ); $self->page_end( $name ); } return $ret; } ########################################################### sub __dump { my( $self, $data ) = @_; $self->fields( $data->{fields} ) if $data->{fields}; foreach my $d ( @{ $data->{data} } ) { $self->write( $d->{data} ); } } ########################################################### # Convert a 2- or 3-dimensional data structure into something we can # easily use. # Lowest-level structure is {data=>[ ...scalars...], fields=>[ ...names...]} # Other possible : maxdepth, depth (internal use) # We either have an array of those in {data} (2-D) # { data=>[ ...lower-level ], fields=>[....names...] } # Otehr possible keys : name (if it's a part of {pages}) # OR we have an array of 2-D structures in {pages} sub analyse { my( $self, $data ) = @_; my $master = { maxdepth=>0, depth=>0 }; my $state = $self->__analyse( $master, $data); if( $master->{maxdepth} == 4 ) { $state->{pages} = delete $state->{data}; } die "ARG!" if $master->{__fields}; return $state; } ########################################################### # Do the heavy lifting. # Recurse over a data structure sub __analyse { my( $self, $parent, $data ) = @_; my $r = ref $data; return $data unless $r; die "Only 2-d and 3-d data is supported" if $parent->{depth} > 2; my $state = { depth=>$parent->{depth}+1 }; $state->{maxdepth} = $state->{depth}; if( $r eq 'ARRAY' ) { $self->__analyse_array( $parent, $data, $state ); } elsif( $r eq 'HASH' ) { $self->__analyse_hash( $parent, $data, $state ); } else { die "Don't know how to handle $r at level $state->{depth}"; } $self->__analyse_rehash( $state, $data, $parent ) if $state->{__fields}; $self->__analyse_depth( $state, $parent ); return $state; } ########################################################### # Turns out $data was a HoH or LoH. So we have to change all the # sub-hashes. sub __analyse_rehash { my( $self, $state, $data, $parent ) = @_; ## If we are here, $data is a LoH... my @fields = sort keys %{ delete $state->{__fields} }; # use Data::Denter; # warn "Rehashing ", Denter $data, $state->{data}; my $first_name; unless( 'ARRAY' eq ref $data ) { my @names; if( $state->{data}[0]{name} ) { # 3-D @names = map { $_->{name} } @{ $state->{data} }; } else { # HoH @names = map { $_->{data}[0] } @{ $state->{data} }; } $data = [ map { { %{$data->{$_}} } } @names ]; $first_name = 1; unshift @fields, 'HONK__TITLE__HONK'; for( my $q=0; $q <= $#$data ; $q++ ) { $data->[$q]{$fields[0]} = $names[$q]; } } $state->{data} = []; foreach my $hash ( @$data ) { push @{ $state->{data} }, { depth=>$parent->{depth}+2, data=>[ @{$hash}{@fields} ] }; } $fields[0] = $self->{master_key} if $first_name; $state->{fields} = \@fields; return; } ########################################################### # Make sure {maxdepth} of the parent is as big as can be sub __analyse_depth { my( $self, $state, $parent ) = @_; if( $state->{depth} > $parent->{maxdepth} ) { $parent->{maxdepth} = $state->{depth}; } if( $state->{maxdepth} > $parent->{maxdepth} ) { $parent->{maxdepth} = $state->{maxdepth}; } } ########################################################### # Recurse over an arrayref sub __analyse_array { my( $self, $parent, $data, $state ) = @_; $state->{data} = []; foreach my $s ( @$data ) { my $sub = $self->__analyse( $state, $s ); if( @{ $state->{data} } ) { my $err = (!!ref $state->{data}[0] ^ !!ref $sub); $err = 1 if not $err and ref $state->{data}[0] and ref $state->{data}[0]{data} ne ref $sub->{data}; # $err = 1 if $state->{fields}; if( $err ) { die "Non-uniform data references at a level $state->{depth}"; } } elsif( ref $sub ) { $parent->{maxdepth}++; } push @{ $state->{data} }, $sub } } ########################################################### # Recurse over a hashref sub __analyse_hash { my( $self, $parent, $data, $state ) = @_; $state->{data} = []; if( $parent->{fields} ) { foreach my $k ( @{ $parent->{fields} } ) { push @{ $state->{data} }, $data->{$k}; } return; } foreach my $k ( sort keys %$data ) { my $sub = $self->__analyse( $state, $data->{$k} ); unless( ref $sub ) { # $data is a hash $parent->{__fields}{$k} = 1; # $fields{$k}=1; } elsif( $sub->{maxdepth}==3 ) { # $data is a HoLoL $sub->{name} = $k; } else { my $r = ref $sub->{data}; if( $r eq 'ARRAY' ) { # $data is a HoL unshift @{$sub->{data}}, $k; } elsif( $r eq 'HASH' ) { # $data is a HoH $sub->{name}=$k; } } if( 0== @{ $state->{data} } and ref $sub ) { $parent->{maxdepth}++; } push @{$state->{data}}, $sub; } } 1; __END__ =head1 NAME Data::Tabular::Dumper - Seamlessly dump tabular data to XML, CSV and XLS. =head1 SYNOPSIS use Data::Tabular::Dumper; $date=strftime('%Y%m%d', localtime); my $dumper = Data::Tabular::Dumper->open( XML => [ "$date.xml", "data" ], CSV => [ "$date.csv", {} ], Excel => [ "$date.xls" ] ); # $data is a 2-d or 3-d data structure $data = { '0-monday' => { hits=>30, misses=>5, GPA=>0.42 }, '1-tuesday' => { hits=>17, misses=>3, GPA=>0.17 }, }; $dumper->dump( $data ); ## If you want more control : $dumper->page_start( "My Page" ); # what each field is called $dumper->fields([qw(uri hits bytes)]); # now output the data foreach my $day (@$month) { $dumper->write($day); } $dumper->page_end( "My Page" ); # sane shutdown $dumper->close(); This would produce the following XML : /index.html 4000 5123412 /something/index.html 400 51234 =head1 DESCRIPTION Data::Tabular::Dumper aims to make it easy to turn tabular data into as many file formats as possible. This is useful when you need to provide data that folks will then process further. Because you don't really know what format they want to use, you can provide as many as possible, and let them choose which they want. Tabular data means data that has 2 dimensions, like a list of lists, a hash of lists, a list of hashes or a hash of hashes. You may also dump 3 dimentional data; in this case, each of the top-level elements are called B and each sub-element is independent. While it might seem desirable to give an example for each data type, this would be onerous to maintain. Please look at the tests to see what a given data object yields. =head1 2 DIMENSIONAL DATA =head2 List of lists Simplest type of data; each of the sub-lists is output as-is. For XML, the lowest elements are number 0, 1, etc. =head2 Hash of lists Each of the sub-lists is output prefixed with the key name. For XML, the lowest elements are number 0, 1, etc, with 0 being the key. =head2 List of hashes The bottom hashes keyed records, column names are hash keys, column values are hash values. Obviously, the list of column names has to be the same for all records, so all the keys in all the hashes are used. If a given hash doesn't have a key, it will be blank in the output at that position. [ { camera=>"EOS 2000", price=>12000.00 }, { camera=>"FinePix 1300", price=>150 }, ] This corresponds to the following table: camera price EOS 2000 12000.00 FinePix 1300 150.00 Note that keys are asciibetically sorted. =head2 Hash of hashes Similar to C, except the first column is the key in the top hash. For XML the key is used instead of C, unless you are using C (see L). Keys are asciibetically sorted. Example : { monday => { honk => 42, bonk=>17 }, wednesday => { honk => 12, blurf=>36 } } CSV and Excel would look like: ,blurf,bonk,honk monday,,17,42 wednesday,36,12 The XML would look like: 17 42 36 12 =head1 3 DIMENSIONAL DATA =head2 List of 2D data Each element in the top list is a page. Pages are named I, I and so on. Each 2D element is treated seperately as above. =head2 Hash of lists of lists =head2 Hash of lists of hashes =head2 Hash of hashes of hashes Each value in the top hash is a page. Pages are named by their keys. Each 2D element is treated seperately as above, as if you were doing: foreach my $key ( sort keys %$HoX ) { $dumper->page_start( $key ); $dumper->dump( $HoX->{$key} ); $dumper->page_send( $key ); } =head2 Hash of hashes of lists B =head1 FUNCTIONS =head2 Dump( $data ) Calls C as a package method. In other words, it does the following: Data::Tabular::Dumper->dump( $data ); =head1 Data::Tabular::Dumper METHODS =head2 open(%writers) Creates the Data::Tabular::Dumper object. C<%writers> is a hash that contains the the package of the object (as keys) and the parameters for it's C function (as values). As a convienience, the Data::Tabular::Dumper::* modules can be specified as XML, Excel or CSV. The example in the L would create 3 objects, via the following calls : $obj0 = Data::Tabular::Dumper::XML->open( ["$date.xml","users", "user"] ); $obj1 = Data::Tabular::Dumper::Excel->open( ["$date.xls"] ); $obj2 = Data::Tabular::Dumper::CSV->open( ["$date.xls", {}] ); Note that you must load a given package first. Copen> will not do so for you. You may also create your own packages. See WRITER OBJECTS below. There is one special key in C<%writers> : =over 4 =item master_key Sets the column name for the first column when dumping hash of lists, hash of hashes or the equivalent 3-D structures. The first column corresponds to the key names of the top hash. =back =head2 close() Does an orderly close of all the writers. Some of the writers need this to clean up data and write file footers properly. Note that DESTROY also calls close. =head2 master_key( [$key] ) Sets the C, returning old value. If called without a parameter, returns current C. =head2 dump( $data ) Analyses C<$data>, then dumps each of it's component objects to the configured files. C is not efficient. It must walk over the data 2 and sometimes 3 times. It may also modify your data, so watch out. May also be called as a package method, in which case it returns a CSV representation of the data. print $fh Data::Tabular::Dumper->dump( $data ); =head2 page_start( $name ) Opens a new page in each file named C<$name>. You must call L if you want it to have a header. For XML, a page is an XML element that wraps all furthur data. The element's name is C<$name> with all non-word characters converted to an underscore (C<$name =~ s/\W/_/g>.) =head2 page_end( $name ) Closes the current page. Please make sure C<$name> is identical to what was passed to C. =head2 fields($fieldref) Sets the column headers to the values in the arrayref $fieldref. Calling this "fields" might be misdenomer. Field headers are often concidered a "special" row of data. =head2 write($dataref) Writes a row of data from the arrayref $dataref. =head1 WRITER OBJECTS An object must implement 4 methods for it to be useable by Data::Tabular::Dumper. =head2 open($package, $p) Create the object, opening any necessary files. C<$p> is the data handed to Data::Tabular::Dumper->open. =head2 close() Do any necesssary cleaning up, like outputing a footer, closing files, etc. =head2 fields($fieldref) Define the names of the fields. C<$fieldref> is an arrayref containing all the field headings. =head2 write($dataref) Write a row of data to the output. C<$dataref> is an arrayref containing a row of data to be output. =head2 page_start($name) =head2 page_end($name) Start and end a new page in the output. If it is called from L, all pages are started and ended with the same C<$name>. If called from user code, all bets are off. =head1 PREDEFINED WRITERS =head2 Data::Tabular::Dumper::XML Produces an XML file of the tabular data. =head2 open($package, [$file_or_fh, $top, $record]) Opens the file C<$file_or_fh> for writing if it is a scalar. Otherwise C<$file_or_fh> is considered a filehandle. The top element is C<$top> and defaults to DATA. Each record is a C<$record> element and defaults to RECORD. =head2 fields($fieldref) Define the tag for each data value. =head2 write($dataref) Output a record. Each item in the arrayref C<$dataref> becomes an element named by the corresponding name set in C. If there are more items in C<$dataref> then fields, the last field name is duplicated. If there are no fields defined, elementes are named 0, 1, etc. Example : $xml=Data::Tabular::Dumper::XML->open(['something.xml']); $xml->fields([qw(foo bar)]); $xml->write([0..5]); Would produce the following XML : 0 1 2 3 4 5 Likewise, $xml=Data::Tabular::Dumper::XML->open(['something.xml']); $xml->dump( [ [ { up=>1, down=>-1, left=>0.5, right=>-0.5 } ] ] ); $xml->close Would produce the following XML : -1 0.5 -0.5 1 =head2 Data::Tabular::Dumper::CSV Produces an CSV file of the tabular data. Each new page is started a row with the page name on it and ending with a blank line. =head2 open($package, [$file_or_fh, $CSVattribs]) Opens the file C<$file_or_fh> for writing if it is a scalar. Otherwise C<$file_or_fh> is considered a filehandle. Creates a Text::CSV_XS object using the attributes in the hashref C<$CSVattribs>. It should be noted that you probably want to set C to C<\n>, otherwise all the output will be on one line. See C for details. Example : $xml=Data::Tabular::Dumper::CSV->open(['something.xml', {eol=>"\n", binary=>1}]); $xml->fields([qw(foo bar)]); $xml->write("me,you", "other"); Would produce the following CSV : foo,bar "me,you",other =head2 fields( $fieldref ) Outputs a row that contains the names of the fields. Basically, it's the same as C. =head2 Data::Tabular::Dumper::Excel Produces an Excel workbook of the tabular data. Each page is a new worksheet. If you want a header on each worksheet, you must call C after each page is started. If you do not call C, a default empty worksheet is used. Note that C handles all this for you. =head2 open($package, [$file]) Creates the workbook C<$file>. =head2 fields($fieldref) Creates a row in bold from the elements in the arrayref C<$fieldref>. =head1 BUGS There are no test cases for all C's edge cases, such as non-heterogeous lower data elements. There is no verification of the Excel workbooks produced. No support for RDBMSes. I'm not fully sure how this would work... each page would be a table? What about lists as the lowest data structure? We'd need a way to match data columns to table columns. C should call C if there is one pending. =head1 AUTHOR Philip Gwyn Egwyn-at-cpan.orgE =head1 COPYRIGHT AND LICENSE Copyright (C) 2005-2009 by Philip Gwyn This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.8.8 or, at your option, any later version of Perl 5 you may have available. =head1 SEE ALSO L, L, L, L. =cut