The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/perl -w

eval 'exec /usr/bin/perl -w -S $0 ${1+"$@"}'
    if 0; # not running under some shell

use Carp;
use Geo::Raster qw(:types :logics);
#use Timeseries;
use Term::ReadLine;
use Math::MatrixReal;
#use XML::Parser;

$_plot_file = '.grid-tmp';
$_input = '';

=pod

=head1 NAME

rash - raster shell - a Perl shell for raster algebra

=head1 SYNOPSIS

    rash.pl <options>

=head1 DESCRIPTION

rash is an extension of the basic Perl shell:

    while (<>) {
	eval;
	print $@;
    }

=head1 BASIC FUNCTIONALITY

rash uses Geo::Raster, thus rasters and PGPLOT graphics can be used
readily. rash adds command line editing and history with
Term::ReadLine. Database connectivity is added using the functionality
provided by Raster module. rash can open a pipe to gnuplot for
plotting data.

=head1 OPTIONS AND ARGUMENTS

=item B<-a>

Same as specifying both B<--db-connection> and B<--gnuplot>. Tries to
open a database connection into Raster module and a pipe to gnuplot.

=item B<--db-connection>

Tries to open a database connection into Raster module. The default
hostname ('' by Raster.pm) can be overridden with option B<-h>
I<hostname> or argument B<--hostname=>I<hostname>. The default
database (the name of the current directory by Raster.pm) can be
overridden with argument B<--database=>I<database>. The default
username with which to connect (the effective userid by Raster.pm) can
be overridden with argument B<--username=>I<username>.

=item B<--gnuplot>

Tries to open (an unbuffered) pipe GNUPLOT to gnuplot.

Options can be set from the rash command line with command
C<options(..list of options and arguments...)>

=cut

sub options {
    close(GNUPLOT) if $_options{gnuplot};
    db_close() if $_options{db_connection};
    %_options = ();
    my $i = 0;
    while ($i <= $#_) {
	$_ = $_[$i];
	if (/^--database=(\w+)/) {
	    $_options{database} = $1;
	} elsif (/^--username=(\w+)/) {
	    $_options{username} = $1;
	} elsif (/^--debug/) {
	    $_options{debug} = 1;
	} elsif (/^--host\w*=(\w+)/) {
	    $_options{hostname} = $1;
	} elsif (/^-h$/) {
	    $i++;
	    $_options{hostname} = $_[$i];
	} elsif (/^--db-connection$/) {
	    $_options{db_connection} = 1;
	} elsif (/^--gnuplot$/) {
	    $_options{gnuplot} = 1;
	} elsif (/^-a$/) {
	    $_options{db_connection} = 1;
	    $_options{gnuplot} = 1;
	} else {
	    carp "unknown argument: $_\n";
	}
	$i++;
    }
    if ($_options{gnuplot}) {
	open GNUPLOT, "| gnuplot" or carp "can't open gnuplot: $!\n";
	select(GNUPLOT); $| = 1;
	select(STDOUT);
    }
    db_connect(\%_options,{PrintError=>1}) if $_options{db_connection};
}

=pod

=head1 COMMANDS

=head2 output

Command C<output(filename)> directs all output to file F<filename>.
If filename is not given, directs all output to STDOUT.

=cut

sub output {
    my($fn,%o) = @_;
    if ($fn and exists $o{gnuplot_add}) {
	open OUTPUT,">>$fn" or croak("can't open $fn: $!\n");
	print OUTPUT "\n\n";
	select OUTPUT;
    } elsif ($fn) {
	open OUTPUT, ">$fn" or croak "can't open $fn: $!\n";
	select OUTPUT;
    } else {
	close(OUTPUT);
	select STDOUT;
    }
}

=pod

=head2 p

Command C<p> is the standard perl print except when the first
parameter is a reference to a hash or to an array.  If the first
parameter is a hash reference, C<p> sorts the hash numerically and
prints it using two columns (separated by single space) or several
columns if the values are references to arrays. If the first parameter
is a hash reference, C<p> prints the array elements, each on its own
row. If the element is a reference to an array, it is expanded.

=cut

sub p {
    my($this,%o) = @_;
    output($o{file}) if $o{file};
    if (ref($this) eq 'HASH') {
	foreach (sort {$a<=>$b} keys %{$this}) {
	    my $v = $$this{$_};
	    if (ref($v) eq 'ARRAY') {
	      print "$_ @{$v}\n";
	    } else {
	      print "$_ $v\n";
	    }
	}
    } elsif (ref($this) eq 'ARRAY') {
	foreach (@{$this}) {
	    if (ref($_) eq 'ARRAY') {
	      print "@{$_}\n";
	    } else {
	      print "$_\n";
	    }
	}
    } else {
      print @_,"\n";
    }
    output() if $o{file};
}

=pod

=head2 plot

Command C<plot(argument,options)> plots the argument using
gnuplot. The argument should be either a reference to a hash (the
returned value of raster method C<contents>, C<histogram>, or some such)
or a string. The options should be a hash, i.e, a list of key, value
-pairs written using the format: key1=>value1, key1=>value1, ...

For example:

C<plot($raster->contents,title=>'cell count of raster',with=>'impulses');>

A hash argument is printed to a temporary file (F<.raster-tmp>)
and the command line

C<plot I<xrange> ".raster-tmp" I<title> with I<with>>

I<range> = '' or [I<keymin-1>:I<keymax+1>] if with equals 'impulses'

I<title> = 'notitle' or 'title B<title>' if B<title> is given in the command line as an option title=>'B<title>'

I<with> = 'lines' or what is given as an option with=>'B<with>'

is piped to gnuplot.

A string argument is piped to gnuplot as a part of the command line

C<plot I<xrange> I<yrange> argument I<title>>

xrange, yrange, and title are empty strings or those given as options.
If argument is a readable file, it is surrounded by double quotation marks.

Gnuplot can be instructed to plot into a png-file using option file=>1.

=cut

sub gnuplot {
    my $line = shift;
    $line = '' unless $line;
    print "$line\n" if $_options{debug};
    print GNUPLOT "$line\n";
}

sub plot {
    my($this,%o) = @_;
    if ($o{file}) {
	gnuplot("set terminal png");
	gnuplot("set output \"$o{file}.png\"");
    }
    $o{with} = 'lines' unless $o{with};
    my $xrange = $o{xrange} ? $o{xrange} : '';
    my $yrange = $o{yrange} ? $o{yrange} : '';
    $o{title} = '' unless $o{title};
    my $using = $o{using} ? 'using ' . $o{using} : 'using 1:2';
    my $other = $o{other} ? ', ' . $o{other} : '';

    gnuplot("set xdata");
    gnuplot("set format x");

    # the plottable may be a HASH ref, ARRAY ref, Timeseries, or an array of those
    # support only array of Timeseries for now

    my $plottable = 'datafile';
    my @datasets = ($this);
    my @title;
    my @with;
    if (ref($this)) {
	if (ref($this) eq 'ARRAY') {
	    @datasets = @{$this};
	    if (ref($this->[0]) eq 'Timeseries') { # list of timeseries
		$plottable = 'timeseries';
	    } else { # list of arrays
		$plottable = 'array';
	    }
	    for my $set (0..$#datasets) {
		$title[$set] = ref($o{title}) ? $o{title}->{$set} : $o{title};
		$with[$set] = ref($o{with}) ? $o{with}->{$set} : $o{with};
	    }
	} elsif (ref($this) eq 'HASH') {
	    $plottable = 'hash';
	    my $set = 0;
	    $with[$set] = ref($o{with}) ? $o{with}->{$set} : $o{with};
	    $title[$set] = ref($o{title}) ? $o{title}->{$set} : $o{title};
	    foreach my $name (sort keys %{$this}) {
		if (ref($this->{$name}) eq 'Timeseries') { # hash of timeseries
		    $plottable = 'timeseries';
		    $datasets[$set] = $this->{$name};
		    if ($o{title}) {
			$title[$set] = ref($o{title}) ? $o{title}->{$set} : $o{title};
		    } else {
			$title[$set] = $name;
		    }
		    $with[$set] = ref($o{with}) ? $o{with}->{$set} : $o{with};
		    $set++;
		}
	    }
	} elsif (ref($this) eq 'Timeseries') {
	    $plottable = 'timeseries';
	} else {
	    croak "don't know how to plot a " . ref($this) . "\n";
	}
    }

    my @what; # = for each dataset: <function> | {"<datafile>" {datafile-modifiers}} 
    my @index;
    my @using;
    if ($plottable eq 'array' or $plottable eq 'hash') {
	my($minx,$maxx);
	my $r = 0;
	for my $set (0..$#datasets) {
	    unless (ref($datasets[$set])) {
		$what[$set] = $datasets[$set];
		$index[$set] = '';
		$using[$set] = '';
		next;
	    }
	    output($_plot_file, $set ? (gnuplot_add=>1) : (0=>0));
	    p $datasets[$set];
	    output;
	    $what[$set] = "\"$_plot_file\"";
	    $index[$set] = "index $set";
	    $using[$set] = $using;
	    if ($with[$set] eq 'impulses') {
		$r = 1;
		if (ref($datasets[$set]) eq 'HASH') {
		    foreach (keys %{$this}) {
			$minx = $_ if !defined($minx) or $_ < $minx;
			$maxx = $_ if !defined($maxx) or $_ > $maxx;
		    }
		} else {
		    foreach (@{$datasets[$set]}) {
			$minx = $$_[0] if !defined($minx) or $$_[0] < $minx;
			$maxx = $$_[0] if !defined($maxx) or $$_[0] > $maxx;
		    }
		}
		$minx--;
		$maxx++;
	    }
	}
	$xrange = "[$minx:$maxx]" if $r;
    } elsif ($plottable eq 'timeseries') {
	for my $set (0..$#datasets) {
	    $with[$set] = ref($o{with}) ? $o{with}->{$set} : $o{with};
	    if ($o{scaled}) {
		$datasets[$set]->scale->save($_plot_file, $set ? (gnuplot_add=>1) : (0=>0));
	    } else {
		$datasets[$set]->save($_plot_file, $set ? (gnuplot_add=>1) : (0=>0));
	    }
	    $what[$set] = "\"$_plot_file\"";
	    $index[$set] = "index $set";
	    $using[$set] = $using;
	}
	gnuplot("set xdata time");
	gnuplot("set timefmt \"%Y%m%d\"");
	gnuplot("set format x \"%d.%m\\n%y\"");
    } else {
	$title[0] = $o{title} ? $o{title} : '';
	$with[0] = $o{with} ? $o{with} : 'points';
	$index[0] = 'index 0';
	$using[0] = $using;
	if (-r $this) {
	    $what[0] = "\"$this\"";
	} else {
	    $what[0] = $this;
	    $using[0] = '';
	}
    }

    if ($#datasets == 0) {
	gnuplot("plot $xrange$yrange $what[0] $using[0] title \"$title[0]\" with $with[0]" . $other);
    } else {
#	unless (@names) {
#	    @names = $_input =~ /\$[a-zA-Z]\w*/g;
#	    unless (@names) {
#		@names = (0..$#datasets);
#	    }
#	}
	$title[0] = '' unless $title[0];
	my $plot = "plot $xrange$yrange $what[0] $index[0] $using[0] title \"$title[0]\" with $with[0]";
	for my $set (1..$#datasets) {
	    $title[$set] = '' unless $title[$set];
	    $plot .= ", $what[$set] $index[$set] $using[$set] title \"$title[$set]\" with $with[$set]";
	}
	gnuplot($plot . $other);
    }

    gnuplot("set xdata") if $plottable eq 'timeseries';
    if ($o{file}) {
	gnuplot("set terminal x11");
	gnuplot("set output");
    }
}

sub fit {
    my($data,$fct) = @_;
    my @params = $fct =~ /\b[a-zA-Z]\w*\b/g;
    my %params = map {$_=>1} @params;
    @params = ();
    foreach (keys %params) {
	push @params,$_ unless /^[xyz]/;
    } 
    my $params = join(',',@params);
    print "using params: $params\n";
    gnuplot("f(x) = $fct");
    gnuplot("FIT_LIMIT = 1e-6");
    output($_plot_file);
    p($data);
    output();
    gnuplot("fit f(x) '$_plot_file' via $params");
}

=pod

=head2 slurp

Command C<slurp(filename,options)> reads the contents of a file into a
hash (or an array, if option array=>1 is given). It is assumed that
the file contains data in two (for hash) or more columns (for a
array). In the case of an array the array values are references to
arrays which each hold the values of one row of data.

=cut

sub slurp {
    my($file,%o) = @_;
    if (!open S,$file) {
	print("$file: $!\n");
	return;
    }
    my $ret;
    if ($o{array}) {
	$ret = [];
    } else {
	$ret = {};
    }
    while (<S>) {
	chomp;
	s/^\s+//;
	s/\s+$//;
	next if /^#/;
	next if $_ eq '';
	last if $_ eq '__END__';
	my(@l) = split /[\s,]+/;
	if ($o{array}) {
	    my $l = [@l];
	    $ret->[$#$ret+1] = $l;
	} else {
	    $ret->{$l[0]} = $l[1];
	}
    }
    close S;
    return $ret;
}

sub nh {
    my($gd,$i0,$j0,$k) = @_;
    $k = 1 if !$k;
    my($i,$j);
    for $i ($i0-$k..$i0+$k) {
	for $j ($j0-$k..$j0+$k) {
	    print $gd->get($i,$j),' ';
	}
	print("\n");
    }
}

=pod

=head1 vars

Command C<vars> lists all variables, and if they are references, the
type of data which they refer to.

=cut

sub vars {
    my @scalars;
    my @arrays;
    my @hashes;
    foreach (sort keys %main::) {
	next if /^_/;
#	next if /^[A-Z]/;
	next if /^$/;
	next if /^scalars$/;
	next if /^[^a-z]/;
	next if /::/;
	local *sym = $main::{$_};
	my $scalar = ref($sym);
	$scalar = 'scalar' unless $scalar;
	push @scalars, "$scalar \$$_ is defined\n" if defined($sym);
	push @arrays, "array \@$_ is defined\n" if defined(@sym);
	push @hashes, "hash \%$_ is defined\n" if defined(%sym);
    }
    foreach (@scalars) {
	print;
    }
    foreach (@arrays) {
	print;
    }
    foreach (@hashes) {
	print;
    }
}

=pod

=head1 SIGINT

rash installs a SIGINT handler which may used to cancel lengthy raster
operations.

=cut

sub cntrlc {
    my($sig) = @_;
    &Raster::gdsigint(1);
}

=pod

=head1 HELP

Commands C<?> and C<help> run C<perldoc rash> thus showing this
manual page.

Command C<? Raster> and C<help Raster> run C<man Raster> thus showing the
manual page of the Raster module.

=head1 EXECUTING SYSTEM COMMANDS

A command line which begins with '!' is interpreted as a system
command.

=cut

sub a2xy {
    my $a = shift;
    my @xy;
    my $i = 0;
    foreach (@{$a}) {
	push @xy,[$i,$_];
	$i++;
    }
    return [@xy];
}

options(@ARGV);
$SIG{INT} = 'cntrlc';
$_term = new Term::ReadLine 'rash';
my $_hfile = "$ENV{HOME}/.rash_history";
$_term->ReadHistory($_hfile);
while ( defined ($_ = $_term->readline('>')) ) {
    chomp;
    $_input = $_;
    if (/^\?$/ or /^help$/i) {
	system "perldoc $0";
    } elsif (/^\? Raster$/ or /^help Raster$/i) {
	system "man Raster";
    } elsif (/^\!(.*)/) {
	system $1;
    } else {
	eval;
	print $@;
    }
#    $_term->addhistory($_input) if $_input =~ /\S/;
}
$_term->WriteHistory($_hfile);
db_close() if $_options{db_connection};
if ($_options{gnuplot}) {
    close(GNUPLOT);
    unlink $_plot_file;
}
print "\n";


##### cellular automatons a'la new kind of science

sub ca {
    my $g = shift;
    my $rule = shift;
    unless (ref($rule)) {
	my @rule = ();
	my $e = 128;
	for (0..7) {
	    if ($rule >= $e) {
		$rule[$_] = 1;
		$rule -= $e;
	    }
	    $e /= 2;
	}
	@{$rule} = @rule;
    }
    my ($M,$N) = $g->size;
    $g *= 0;
    $g->set(0,int($N/2),1);
    for my $i (1..$M-1) {
	my $l = $g->get($i-1,0);
	my $m = $g->get($i-1,1);
	for my $j (1..$N-2) {
	    my $r = $g->get($i-1,$j+1);
	    $g->set($i,$j,1) if $rule->[0] and $l and $m and $r;
	    $g->set($i,$j,1) if $rule->[1] and $l and $m and !$r;
	    $g->set($i,$j,1) if $rule->[2] and $l and !$m and $r;
	    $g->set($i,$j,1) if $rule->[3] and $l and !$m and !$r;
	    $g->set($i,$j,1) if $rule->[4] and !$l and $m and $r;
	    $g->set($i,$j,1) if $rule->[5] and !$l and $m and !$r;
	    $g->set($i,$j,1) if $rule->[6] and !$l and !$m and $r;
	    $g->set($i,$j,1) if $rule->[7] and !$l and !$m and !$r;
	    $l = $m;
	    $m = $r;	    
	}
    }
}


############### STATISTICAL ANALYSIS TOOLPACK

##### helper functions 

sub size { # this is a row-based size
    my $A = shift;
    if (ref($A) eq 'Math::MatrixReal') {
	my ($m,$n) = $A->dim;
	return ($m,$n);
    } elsif (ref($A) eq 'ARRAY') {
	my $m = $#$A + 1;
	my $n;
	for (@{$A}) {
	    last unless ref($_) eq 'ARRAY';
	    $n = $#$_ + 1 if !$n or $#$_ + 1 > $n;
	}
#	print "size is $m x $n (row-based)\n";
	return ($m,$n);
    }
#    print "is not a matrix\n";
}

sub transpose {
    my $A = shift;
    my $AT;
    if (ref($A) eq 'Math::MatrixReal') {
	my ($r,$c) = $A->dim;
	$AT = new Math::MatrixReal $c,$r;
	$AT->transpose($A);
    } else {
	for $i (0..$#$A) {
	    for $j (0..$#{$A->[$i]}) {
		$AT->[$j]->[$i] = $A->[$i]->[$j];
	    }
	}
    }
    return $AT;
}

sub inverse {
    my $A = shift;
    my $LR_matrix = $A->decompose_LR();
    my $Ainv = $LR_matrix->invert_LR();
    return $Ainv;
}

##### correlations, input is [[column1],[column2]] columns are sample vectors
##### you may get X like this for example:

# $d = sql("select parno,thetki,arvo from data where stun=21 order by thetki");
# $t = Timeseries::hash($d);
# @t=();foreach (keys %{$t}) {push @t,$t->{$_}}
# Timeseries::intersection(@t);
# @X=();foreach (sort keys %{$t}) {push @X,$t->{$_}->value_array}

#### if X is from Timeseries::xy, transpose it first

sub correlations {
    my($X) = @_;

    my @one = @{$$X[0]};
    foreach (@one) {$_=1}
    unshift @{$X},[@one];

    $X = new_from_cols Math::MatrixReal $X;
    my($n,$k) = $X->dim;
    $k--;

    my $XT = transpose($X);
    my $A = $XT * $X;

    my $R;
    for my $i (1..$k) {
	my $Sii = $A->element($i+1,$i+1) - $A->element($i+1,1)**2/$n;
	for my $j (1..$k) {
	    my $Sij = $A->element($i+1,$j+1) - 
		$A->element($i+1,1)*$A->element($j+1,1)/$n;
	    my $Sjj = $A->element($j+1,$j+1) - $A->element($j+1,1)**2/$n;
	    $R->[$i-1]->[$j-1] = $Sij/sqrt($Sii*$Sjj);
	}
    }
    return $R;
}

##### multiple linear regression, input is similar as above, 
##### X contains sample vectors of independent variables
##### y contains the sample vector of dependent variable

# test: Walpole, Myers 9.3

sub test_linreg {
    @X = ([1.74,6.32,6.22,10.52,1.19,1.22,4.10,6.32,4.08,4.15,10.15,1.72,1.70],
	  [5.30,5.42,8.41,4.63,11.60,5.85,6.62,8.72,4.42,7.60,4.83,3.12,5.30],
	  [10.80,9.40,7.20,8.50,9.40,9.90,8.00,9.10,8.70,9.20,9.40,7.60,8.20]);
    @y = (25.5,31.2,25.9,38.4,18.4,26.7,26.4,25.9,32.0,25.2,39.7,35.7,26.5);
    $r = linreg(\@X,\@y);
    p($r);
}


sub linreg {
    my($X,$y) = @_;

    my @X = @{$X};
    my @one = @{$X[0]};
    foreach (@one) {$_=1}
    unshift @X,[@one];

    $X = new_from_cols Math::MatrixReal [@X];
    $y = new_from_cols Math::MatrixReal [$y];
    my($n,$k) = $X->dim;
    $k--;

    my $XT = transpose($X);
    my $A = $XT * $X;
    my $g = $XT * $y;
    my $Ainv = inverse($A);
    croak "no solution\n" unless $Ainv;
    my $b = $Ainv * $g;

    my @b;
    for my $i (1..$k+1) {
	$b[$i-1] = $b->element($i,1);
    }

    my $temp = $g->element(1,1)**2/$n;

    my $SST = (transpose($y) * $y)->element(1,1) - $temp;
    my $SSR = (transpose($b) * $g)->element(1,1) - $temp;
    my $SSE = $SST - $SSR;
    my $R2 = $SSR/$SST;

    my $dof = $n-$k-1;
    my $s2 = $SSE/$dof;

    my $f = $SSR/$k/$s2;

    my @m;
    for my $i (1..$#b) {
	push @m,"b$i*x$i";
    }
    my $m = join(' + ',@m);

#    print "model: y = b0 + $m\n";
#    print "n = $n, k = $k, dof = $dof\n";
#    print "SST = $SST\n";
#    print "SSR = $SSR\n";
#    print "SSE = $SSE\n";
#    print "R^2 = $R2\n";
#    print "f = $f\n";

    my @v;
    my @t;
    for my $i (0..$#b) {
	$v[$i] = $s2 * $Ainv->element($i+1,$i+1);
	$t[$i] = $b[$i] / sqrt($v[$i]);
#	print "b$i = $b[$i], s2 = $v[$i], t = $t[$i]\n";
    }
    return [[@b],[@v],[@t],[$SST,$SSR,$SSE,$R2,$f],[$n,$k,$dof]];
}