#!/usr/bin/perl -w ###################################################### # history.pl ###################################################### # Author: liangc # $Id: history.pl,v 1.7 2006/08/24 06:41:57 thladish Exp $ # create ancestral intron states based on an alignment and tree # in a nexus file. The BUGS program will run many times with small # number of generations to find those can converge (to global optimum) # and then run one with a large number of generations (convergence) # the ancestral states will be computed from the average value of # those generations and write back to file in history block use strict; use Pod::Usage; use Data::Dumper; use Getopt::Long; use Bio::NEXUS; my $version = "\$Revision\$"; Getopt::Long::Configure("bundling"); # for short options bundling my %opts = (); GetOptions(\%opts, 'help|h', 'man', 'v', 'l=s','n=s', 'b=s', 'm=s', 'debug') or pod2usage(2); if ( $opts{'man'} ) { pod2usage(-exitval => 0, -verbose => 2) } if ( $opts{'help'} ) { pod2usage(1) } if ($opts{v}) { print "history.pl $version\n"; exit; } my $infile = shift; unless ($infile) {die "history.pl [options] \n";} my $burnin = defined $opts{b} ? $opts{b} : 1000; # burnin generation # my $monitor = $opts{m} || 1000; # monitor # my $good_num = $opts{n} || 3; # the number of files to be selected my $runlevel = defined $opts{l} ? $opts{l} : 0; my $debug = $opts{debug}; my $stem = $infile; $stem =~ s/^(.+).nex$/$1/; $stem =~ s/^(.+\/)?([^\/]+)$/$2/; #print "$1 $stem\n"; exit; $stem =~ s/[^a-zA-Z0-9.]/\./g; # remove non alphanumeric characters in name my $dir = "$stem.dir"; system "mkdir $dir"; system "cp $infile $dir/$stem.nex"; $infile = "$stem.nex"; chdir "$dir"; my @data; # all monitored data in file my ($llike, $alpha, $beta, $r, $k, @node); my $maxlike = -1.E+20; my $minfile = 0; my $maxfile = 2; my @like; my $names; # name of each node in the index file #print "$runlevel--$burnin--$monitor\n";exit; if ($runlevel == 0) { exec_bugs(); re_exec_bugs($minfile, $maxfile, 50, 50); rename "$stem.$maxfile.bug", "$stem.0.bug"; } if ($runlevel < 2) { re_exec_bugs(0, 0, $burnin, $monitor); }else { read_bugs_out("$stem.0.bugs1"); } write_data(0); modify_nexus(); exit; # select runs not trapped locally sub exec_bugs { $maxfile = 0; for (my $i = 0; $i < 20; $i++) { system ("bugs.pl $stem.$i $infile > $stem.$i.bug") == 0 or die "BUGS execution error!\n"; system "bugs <$stem.$i.cmd >$stem.$i.bugs.log"; read_bugs_out("bugs1"); print "$i: ", $llike->[0], "\n" if $debug; if ( $alpha->[1] != 0 && abs($alpha->[0]/$alpha->[1]) < 1000 && $beta->[1] != 0 && abs($beta->[0]/$beta->[1]) < 1000 && $r->[1] != 0 && abs($r->[0]/$r->[1]) < 1000 && $k->[1] != 0 && abs($k->[0]/$k->[1]) < 1000 ) { print " move to $stem.$maxfile ", $llike->[0], "\n" if $debug; rename "$stem.$i.bug", "$stem.$maxfile.bug"; $like[$maxfile] = $llike->[0]; $maxfile++; } if ($maxfile == $good_num) {last;} } if (--$maxfile == -1) {print "Error for file: $infile; no good starting was found\n"; exit; } print join(' ', @like), "\n" if $debug; } # select a run with maximum mean probability sub re_exec_bugs { my ($bugs1, $bugs2, $burnin, $monitor) = @_; # if ($bugs1 == $bugs2) { die "-----Error: $stem\n"; } # my ($nchar, $nnodes) = read_node_num("$stem.$bugs1.bug"); # $burnin = 1000; #int(sqrt(sqrt(sqrt($nchar*$nnodes+1))))*1000; # print "burnin: $burnin\n"; foreach my $i ($bugs1..$bugs2) { print "$i running...\n" if $debug; system ("bugs.cmd.pl $stem.$i $burnin $monitor") == 0 or die "BUGS error: $infile\n"; system "bugs <$stem.$i.cmd >$stem.$i.bugs.log"; rename "bugs.ind", "$stem.$i.bugs.ind"; rename "bugs.out", "$stem.$i.bugs.out"; rename "bugs1.ind", "$stem.$i.bugs1.ind"; rename "bugs1.out", "$stem.$i.bugs1.out"; read_bugs_out("$stem.$i.bugs1"); select_file($i); $like[$i] = $llike->[0]; } print join(' ', @like), "\n" if $debug; } # select the run to be used sub select_file { my $i = shift; if ($llike->[0] > $maxlike) { $maxlike = $llike->[0]; $maxfile = $i; } } # read .out file sub read_bugs_out { my $bugstem = shift; read_ind($bugstem); open (INFILE, "$bugstem.out") or die "Error open file\n"; for (my $i = 1; my $line = ; $i++) { # print "$line\n"; exit; $line =~ s/^\s+//; ${$data[$i]} = [split /\s+/, $line]; } close (INFILE); } #read index file sub read_ind { my $bugstem = shift; open(IND, "$bugstem.ind"); while (my $line = ) { $line =~ s/^s+//; my ($v1, $v2) = split /\s+/, $line; if (!$v1) { print "$line, -- $v1, $v2\n"; exit;} if ($v1 eq 'llike') { $data[$v2] = \$llike; }elsif ($v1 eq 'r') { $data[$v2] = \$r; }elsif ($v1 eq 'k') { $data[$v2] = \$k; }elsif ($v1 eq 'alpha') { $data[$v2] = \$alpha; }elsif ($v1 eq 'beta') { $data[$v2] = \$beta; }elsif ($v1 =~ /^root/) { my $name = $v1; $name =~ /\[(\d+)\]/; # print "$1\n"; $data[$v2] = \$node[0][$1]; # [intron#][first node--root], }elsif ($v1 =~ /^node/) { my $name = $v1; $name =~ /\[(\d+),(\d+)\]/; # print "$2, $1\n"; $data[$v2] = \$node[$1][$2]; # [intron#][other internal node] } } close(IND); } # write bugs out to a file sub write_data { my ($file) = @_; # print &Dumper($llike); exit; # print scalar @{$node[1]}, "\n"; exit; # print &Dumper($node[0]);exit; open(OUTFILE, ">$stem.node.bugs.out"); my ($nnodes, $nchar) = read_node_num("$stem.$file.bug"); print OUTFILE "inodes\t", $nnodes, "\n"; print OUTFILE "characters\t", $nchar, "\n"; my @llike = map {$_->[0]} @{[$llike]}; my @r = map {$_->[0]} @{[$r]}; my @k = map {$_->[0]} @{[$k]}; my @alpha = map {$_->[0]} @{[$alpha]}; my @beta = map {$_->[0]} @{[$beta]}; print OUTFILE "loglikelihood\t", join(' ', @llike), "\n"; print OUTFILE "alpha\t", join(' ', @alpha), "\n"; print OUTFILE "beta\t", join(' ', @beta), "\n"; print OUTFILE "r\t", join(' ', @r), "\n"; print OUTFILE "k\t", join(' ', @k), "\n"; $names = read_node_name("$stem.$file.log"); for (my $i = 0; $i < @node; $i++) { print OUTFILE $names->{$i}, "\t"; for (my $j = 1; $j < @{$node[0]}; $j++) { printf OUTFILE "%4.3f\t", $node[$i][$j][0]-1; } print OUTFILE "\n"; } close(OUTFILE); } # convert internal node names used in bugs run sub read_node_name { my $file = shift; open(LOG, "$file"); my %names; $names{0} = 'root'; while () { if (/^\s+$/) {last;} } while () { /(\d+):\s*(\S+)/; $names{$1} = $2; } close(LOG); # print &Dumper(%names);exit; return \%names; } # find out the number of parameters based on tree size and character num sub read_node_num { my $file = shift; open(BUG, "$file"); my ($nchar, $nnodes); while () { if (/nChar = (\d+)/) {$nchar = $1;} elsif (/nNode = (\d+)/) {$nnodes = $1+1;} elsif ($nchar && $nnodes) {last;} } close(BUG); return ($nnodes, $nchar); } # modifying input nexus file (with new name) sub modify_nexus { my $nexus = new Bio::NEXUS($infile); create_history_block($nexus, \@node); modify_span_block($nexus); $nexus->write("../${stem}.h.nex"); } # create history block based on BUGS output sub create_history_block { my ($nexus, $nodes) = @_; my $otuset = $nexus->get_block('characters', 'intron')->get_otuset()->clone(); my @otus; for (my $i = 0; $i < @$nodes; $i++) { my @seq; for (my $j = 1; $j < @{$nodes->[0]}; $j++) { push @seq, [sprintf("%4.3f", 2-$nodes->[$i][$j][0]), (sprintf"%4.3f", $nodes->[$i][$j][0]-1)]; } push @otus, new Bio::NEXUS::TaxUnit($names->{$i}, \@seq); } # @otus = sort { $a->get_name cmp $b->get_name } @otus; my $history = new Bio::NEXUS::HistoryBlock('History'); foreach my $otu (@otus) { $otuset->add_otu($otu); } $history->set_title('intron'); $history->add_link('taxa', $nexus->get_name); $history->set_format($nexus->get_block('characters', 'intron')->get_format()); $history->set_otuset($otuset); $history->add_tree($nexus->get_block("trees")->get_tree); $nexus->remove_block('history', 'intron'); $nexus->add_block($history); } # modify span block by adding BUGS parameters sub modify_span_block { my $nexus = shift; my %ancestor; $ancestor{program}='BUGS'; $ancestor{version}='unix-0.600'; my $like = $llike->[0]; my $a = $alpha->[0]; my $b = $beta->[0]; $ancestor{parameters}="loglikelihood=$like, alpha=$a, beta=$b, burnin=$burnin, monitor=$monitor"; my $spanblock = $nexus->get_block("span"); if (! $spanblock) { $spanblock = new Bio::NEXUS::SpanBlock('span'); $spanblock->{method}{ancestral_inference} = \%ancestor; $nexus->add_block($spanblock); }else { $spanblock->{method}{ancestor} = \%ancestor; } } ################# POD Documentation ################## __END__ =head1 NAME history.pl =head1 SYNOPSIS history.pl [options] Options: --help -h brief help message --man full documentation --debug output running info -n the number of converging initial runs -l running level 0 new run 1 old run with new number of generations 2 just retrieve data from existing files -b the number of burnin generations -m the number of monitoring generations -v print version information and quit =head1 Description create ancestral intron states based on an alignment and tree in a nexus file. The BUGS program will run many times with small number of generations to find those can converge (to global optimum) and then run one with a large number of generations (convergence) the ancestral states will be computed from the average value of those generations and write back to file in history block =head1 REQUIRES BUGS must be installed for running this program. =head1 VERSION $Revision: 1.7 $ =head1 AUTHOR Chengzhi Liang =cut ##################### End ##########################