#! /Users/jj/bin/perl # XLSperl - use "perl -e" commands with Microsoft Excel files # # Copyright (C) 2007-2008 Jon Allen # # This software is licensed under the terms of the Artistic # License version 2.0. # # For full license details, please read the file 'artistic-2_0.txt' # included with this distribution, or see # http://www.perlfoundation.org/legal/licenses/artistic-2_0.html #--Load required modules and activate Perl's safety features---------- use strict; use warnings; use bytes; use open IN => ":raw"; use Getopt::Long; use IO::Handle; use Spreadsheet::ParseExcel; use Spreadsheet::ParseExcel::Utility qw/int2col/; use Variable::Alias qw/alias/; our $VERSION = "0.7"; #--Define global variables------------------------------------------------- use vars qw/$WS $ROW $COL $COLNUM $CELL @F %F/; #--Main program code------------------------------------------------------- MAIN: { my %options; Getopt::Long::Configure('bundling'); GetOptions( \%options, qw/e=s l n p x a F=s w/, 'm=s' => sub{ eval qq{use $_[1] ()}; die $@ if $@;}, 'M=s' => sub{ my ($m,$a) = split /=/,$_[1],2; if ($a) { eval qq{use $m split(/,/,q{$a})}; die $@ if $@; } else { eval qq{use $m}; die $@ if $@; } }, 'v' => sub{ print "This is XLSperl, version $VERSION\n"; exit } ); die(qq{Usage: $0 [options] -e "code" \n}) unless ($options{e}); no strict 'refs'; *{__PACKAGE__ . '::XLSprint'} = \&XLSperl::printer::XLSprint; *{'IO::Handle::XLSprint'} = \&XLSperl::printer::XLSprint; use strict 'refs'; my $eval_function = create_eval_handler(%options); $\ = "\n" if ($options{l}); if ($options{n} or $options{p}) { $eval_function = make_loop( perl => $eval_function, autosplit => $options{a}, splitregex => $options{F}, autoline => $options{l} ); } $eval_function->(); } #--make_loop--------------------------------------------------------------- # # Purpose: Converts an eval function into a loop, iterating over files # in @ARGV # #-------------------------------------------------------------------------- sub make_loop { my %args = @_; my $next_file = file_iterator(); return sub { while (my $data = $next_file->()) { my $excel = Spreadsheet::ParseExcel::Workbook->Parse(\$data); if ($excel->{Worksheet}) { loop_xls(%args, xls=>$excel); } else { if (open my $fh,'<',\$data) { loop_text(%args, fh=>$fh); } } } } } #--file_iterator----------------------------------------------------------- # # Purpose: Returns a function to loop through , returning file contents # #-------------------------------------------------------------------------- sub file_iterator { return sub { local $/ = undef; my $file = <>; return $file or undef; } } #--loop_xls---------------------------------------------------------------- # # Purpose: Loops through XLS file, executing $args{perl} # #-------------------------------------------------------------------------- sub loop_xls { my %args = @_; foreach my $worksheet (@{$args{xls}->{Worksheet}}) { $WS = $worksheet->{Name}; my ($row_min,$row_max) = $worksheet->RowRange(); my ($col_min,$col_max) = $worksheet->ColRange(); foreach my $row ($row_min .. $row_max) { $ROW = $row + 1; if ($args{autosplit}) { @F = map {$_ && $_->Value() || undef} @{$worksheet->{Cells}[$row]}; @F = map {$_ && chomp;$_} @F if ($args{autoline}); # Make the %F hash an alias for @F, i.e. $F{A} == $F[0] for (0 .. $#F) { alias($F[$_],$F{int2col($_)}); } undef $_; # What should $_ be if autosplitting an Excel file? $args{perl}->(); } else { foreach my $col ($col_min .. $col_max) { $COLNUM = $col + 1; $COL = int2col($col); if (my $cell = $worksheet->Cell($row,$col)) { $CELL = int2col($col).$ROW; $_ = $cell->Value(); chomp if ($args{autoline}); $args{perl}->(); } } } } } } #--loop_text---------------------------------------------------------------- # # Purpose: Loops through text file, executing $args{perl} # #-------------------------------------------------------------------------- sub loop_text { #warn "loop_text() called\n"; my %args = @_; while ($_ = $args{fh}->getline) { chomp if ($args{autoline}); if ($args{autosplit}) { if ($args{splitregex}) { @F = split $args{splitregex}; } else { @F = split; } } $args{perl}->(); } } #--Subroutine to create cell exec function --------------------------- sub create_eval_handler { my %args = @_; # Create and return a coderef (closure) to ensure that the Perl code # in $args{e} is only evaluated once - using a direct eval in the # input loop will cause any END{} blocks to be defined multiple times. my $handler = eval ' return sub { EVAL: { no strict; '.(($args{w})?'use warnings;':'no warnings;').$args{e}.'; print if ($args{p}); } } '; die($@) if ($@); return $handler; } #-------------------------------------------------------------------------- #-------------------------------------------------------------------------- #-------------------------------------------------------------------------- package XLSperl::printer; use strict; use warnings; use Scalar::Util qw/openhandle/; use Spreadsheet::WriteExcel::Simple; our %XLSobject; END { foreach (keys %XLSobject) { print {$XLSobject{$_}{FH}} $XLSobject{$_}{XLS}->data; } } sub XLSprint { my $currfh = select(); my $handle; { no strict 'refs'; $handle = openhandle($_[0]) ? shift : \*$currfh; use strict 'refs'; } @_ = $_ unless @_; unless ($XLSobject{$handle}) { my $XLSfile = Spreadsheet::WriteExcel::Simple->new(); $XLSobject{$handle}{FH} = $handle; $XLSobject{$handle}{XLS} = $XLSfile; } $XLSobject{$handle}{XLS}->write_row(\@_); } #-------------------------------------------------------------------------- =head1 NAME XLSperl - use Perl "one-liners" with Microsoft Excel files =head1 SYNOPSYS XLSperl [options] -e '...' file1.xls file2.xls ... fileX.xls cat file.txt | XLSperl [options] -e '...' =head1 DESCRIPTION =head2 Background Perl "one-liners" have a great many uses for quick data processing tasks, often replacing the UNIX tools C, C, and C. For example, a simple "grep" function can be written as: perl -lne '/pattern/ and print' file.txt which improves on the standard C function by allowing the extended features of Perl regular expressions to be used. However, this form of processing is only suitable for data that can be read (or needs to be written) in plain text format. XLSperl lets you use the same commands to process and create Microsoft Excel files, e.g. the following command will "grep" an Excel document: XLSperl -lne '/pattern/ and print' file.xls =head2 Usage Basic usage of XLSperl is as follows: XLSperl [options] -e 'perl_code' file1.xls file2.xls ... fileX.xls Additionally Microsoft Excel files can be piped in to XLSperl cat file.xls | XLSperl [options] -e 'perl_code' =head2 Options Options to XLSperl mirror the standard options to C: =over =item -n Assumes an input loop which will iterate over each cell in the spreadsheet, assigning $_ with the cell's value. =item -a Changes the input loop to process Excel files row by row, splitting column values to @F and %F. =item -F Sets input record separator when processing text files (defaults to splitting on whitespace) =item -l Automated line-end processing - chomps the value of each cell and sets $/ to "\n" =item -p Prints the value of $_ after each iteration of the input loop =item -e Perl code to execute on each iteration of the input loop (required) =item -m =item -M The C<-m> and C<-M> options load additional modules into XLSperl in the same way as the C<-m> and C<-M> options to C (see L. =item -w Enables warnings =item -v Prints version number and exits =back =head2 Special variables XLSperl adds the following special varables: =over =item $WS Worksheet name =item $ROW Current row (1 .. x) =item $COL Current column name (A .. x) =item $COLNUM Current column number (1 .. x) =item $CELL Current cell (A1 .. ZZx) =item @F Array of cell values (in autosplit mode) =item %F Hash of cell values (in autosplit mode). Entries in %F are aliases for the corresponding elements of @F, so modifying $F{A} will also update the value of $F[0] and vice-versa. =back =head2 Exported functions XLSperl adds the following new functions for use in your Perl code: =over =item XLSprint( cell_1, cell_2, ... cell_X ); Outputs a row of data in Excel format. Usage is as follows: # Print row to default filehandle XLSprint @F; # Print row to named filehandle XLSprint STDERR @F; XLSprint $fh,@F; Note that once a filehandle has been used with the XLSprint function, to avoid corruption of the generated Excel file the 'normal' print function should not be used on that filehandle, i.e. do not do this: XLSprint STDERR @errors; warn "An error happened"; =back =head2 Examples =over =item Use a regular expression to extract data from a spreadsheet XLSperl -nle "/[A-Z](\d+)\d/ and print $1" cells.xls =item Basic conversion from XLS to CSV XLSperl -nale 'print join ",",@F' file.xls >file.csv =item More correct conversion from XLS to CSV (thanks to Sam Vilain): XLSperl -MText::CSV_XS -nale 'BEGIN{$c=Text::CSV_XS->new} if ($w ne $WS){open CSV,">$ARGV.$WS.csv" or die $!;$w = $WS} $c->print(\*CSV,\@F)' file1.xls file2.xls =item Extract a single row from a spreadsheet cat file1.xls | XLSperl -nle 'print if ($ROW == 2)' =item Convert a text file to Excel format, removing comment lines XLSperl -F: -nale 'next if /^#/; XLSprint @F' /etc/passwd >passwd.xls =back =head1 SYSTEM REQUIREMENTS XLSperl binary packages have no external dependencies, and have been tested on the following platforms: =over =item * Linux i686 (tested on Ubuntu version 6.0.6) =item * Microsoft Windows (tested on Windows XP SP 2) =item * Mac OS X (tested on OS X 10.5.3, Intel CPU only) =back To run XLSperl from source, the following CPAN modules must be installed: =over =item * Spreadsheet::ParseExcel (tested with version 0.28) =item * Spreadsheet::WriteExcel::Simple (tested with version 1.04) =item * Variable::Alias (tested with version 0.01) =back XLSperl has been tested with Perl versions 5.8.8 and 5.10.0. =head1 TODO =over =item * Include a mechanism for creation and in-place editing of Excel documents. =item * Ability to use XLSperl as a command interpreter (C<#! /usr/bin/XLSperl> in scripts). =back =head1 SEE ALSO =over =item XLSperl homepage - L =item I talk slides - L =item I by Tim Maher - L =back =head1 AUTHOR Written by Jon Allen =head1 COPYRIGHT and LICENSE Copyright (C) 2007 Jon Allen This software is licensed under the terms of the Artistic License version 2.0. For full license details, please read the file 'artistic-2_0.txt' included with this distribution, or see http://www.perlfoundation.org/legal/licenses/artistic-2_0.html =cut