#!/usr/bin/perl -w ###################################################### # nextool.pl ###################################################### # Author: liangc, yangp # $Id: nextool.pl,v 1.40 2006/09/05 16:50:13 vivek Exp $ # this script provides a set of functions for manipulating NEXUS files # eg, select/exclude/rename OTUs, select character columns in OTUs # select/exclude subtree, select/reroot tree use strict; use Pod::Usage; use Carp; use Data::Dumper; $Data::Dumper::Maxdepth=3; use Bio::NEXUS; my $infile = shift @ARGV; unless ($infile) {die "Usage: nextool.pl [outfile] [command [options]]\n";} if ($infile eq "-v") { die "nextool.pl \$Revision: 1.40 $\n"; } if ($infile eq "-h" || $infile eq '--help') { pod2usage(-exitval => 0, -verbose => 1); } my $outfile = shift @ARGV; my $command = shift @ARGV; #if ($outfile eq "-") {$outfile = \*STDOUT;} if ($outfile =~ /(^-$|^STDOUT$)/) {$outfile = \*STDOUT;} if (! $outfile || ! $command || $command eq "rewrite") { my $nexus = new Bio::NEXUS($infile, 1); $nexus->write($outfile||"temp.nex", 1); exit; } my $nexus = new Bio::NEXUS($infile); if ($command eq "rename_otus") { $nexus = rename_otus($nexus, @ARGV); $nexus->write($outfile); }elsif ($command eq "select") { my $by = shift @ARGV; if ($by =~ /^block/i) { $nexus = selectbyblock($nexus, @ARGV); } elsif ($by =~ /^otu/i) { $nexus = selectbyotu($nexus, 1, @ARGV); } elsif ($by =~ /^subtree/i) { $nexus = selectbyinode($nexus, @ARGV); } elsif ($by =~ /^tree/i) { $nexus = selectbytree($nexus, @ARGV); } elsif ($by =~ /^column/i) { $nexus = selectbycolumn($nexus, 1, @ARGV); } elsif ($by =~ /^set/i) { $nexus = selectbysets($nexus, 1, @ARGV); } $nexus->write($outfile); }elsif ($command eq "exclude") { my $by = shift @ARGV; if ($by =~ /block/i) { $nexus = exclude_blocks($nexus, @ARGV); } elsif ($by =~ /otu/i) { $nexus = selectbyotu($nexus, 0, @ARGV); } elsif ($by =~ /subtree/i) { $nexus = exclude_subtree($nexus, @ARGV); } elsif ($by =~ /^column/i) { $nexus = selectbycolumn($nexus, 0, @ARGV); } elsif ($by =~ /^set/i) { $nexus = selectbysets($nexus, 0, @ARGV); } $nexus->write($outfile); }elsif ($command eq "reroot") { $nexus = reroottree($nexus, @ARGV); $nexus->write($outfile); } elsif ($command eq "addtree") { &addtree($nexus,@ARGV); $nexus->write($outfile); } elsif ($command =~ /^makesets?$/) { my $by = shift @ARGV; if ($by =~ /(?:file|^f$)/i) { $nexus = setsbyfile($nexus, @ARGV); } elsif ($by =~ /(?:inode|^i$)/i) { $nexus = setsbyinode($nexus, @ARGV); } elsif ($by =~ /(?:otu|^o$)/i) { $nexus = setsbyotus($nexus, @ARGV); } elsif ($by =~ /(?:clade|^c$)/i) { $nexus = setsbyclade($nexus, @ARGV); } elsif ($by =~ /(?:union|^u$)/i) { $nexus = setsbyunion($nexus, @ARGV); } elsif ($by =~ /(?:difference|^d$)/i) { $nexus = setsbydifference($nexus, @ARGV); } elsif ($by =~ /(?:charstate|^cs$)/i) { $nexus = setsbycharstate($nexus, @ARGV); } elsif ($by =~ /(?:cladeconsensus|^cc$)/i) { $nexus = setsbycladeconsensus($nexus, @ARGV); } else {die "Expecting command that looks like 'makesets ', where bymode is one of: file, inode, otu, clade, union, difference, charstate, cladeconsensus\n"}; $nexus->write($outfile); } elsif ($command =~ /^removesets?$/) { $nexus = removesets($nexus, @ARGV); $nexus->write($outfile); } elsif ($command =~ /^renamesets?$/) { $nexus = renamesets($nexus, @ARGV); $nexus->write($outfile); } elsif ($command =~ /^listsets?$/) { file_overwrite_warning($infile, $outfile, $command, 0); listsets($nexus, $outfile, @ARGV); } elsif ($command =~ /^listsetnames?$/) { file_overwrite_warning($infile, $outfile, $command, 0); listsetnames($nexus, $outfile); } else { die "unknown command: $command\n"; } exit; ######### SUBROUTINES ######################### # rename OTUs sub rename_otus { my ($nexus, $transfile) = @_; open(FILE, "<$transfile") or die "ERROR: Can\'t open $transfile\n"; my @lines = ; my $lines = "@lines"; my %translation = (split(/\s+/, $lines)); return $nexus->rename_otus(\%translation); } # select a subset of blocks sub selectbyblock { my ($nexus, @blocks) = @_; return $nexus->select_blocks(\@blocks); } # select subset of OTUs sub selectbyotu { my ($nexus, $mode, @args) = @_; die "need otu names" unless (@args); my @otus; if ($args[0] eq '-f') { # input from file my $file = $args[1]; open(FILE, $file) or carp "File $file not found\n"; my @lines = ; @otus = split /\s+/, "@lines"; close(FILE); }else { # input from command line separated by space my $list = join( " ", @args ); $list = unquote( $list ); $list =~ s/^\s+|\s+$//g; @otus = split( /[,\s]\s*/, $list ); } if ( $mode == 1 ) { # select mode return $nexus->select_otus(\@otus); } else { # exclude mode return $nexus->exclude_otus(\@otus); } } # select a tree sub selectbytree { my ($nexus, $treename) = @_; ($treename) or die "ERROR: Need to specify a tree to be selected\n"; return $nexus->select_tree($treename); } # select a subtree sub selectbyinode { my ($nexus, $nodename, $treename) = @_; $nodename or die "ERROR: Need to specify an internal node for subtree\n"; return $nexus->select_subtree($nodename, $treename); } # use $command=1 for select and $command=0 for exclude # arguments (column numbers) can be of form: # 1) 1-3 4 5 6-10 or 1-3, 4 5, 6-10 # 2) -f contains numbers in format as examplified in 1) sub selectbycolumn { my ($nexus, $command, @args) = @_; my $args = "@args"; $args =~ s/title\s*=\s*(\w+)//i; my $title = $1; # my $block = $nexus->get_block("characters", $title); die "need column numbers" unless $args; my $columns; if ($args =~ /-f (\S+)/) { # input from file my $file = $1; $columns = do{ local(@ARGV, $/) = $file; <>}; $columns =~ s/\n/ /g; }else { # input from command line separated by comma or space $columns = $args; } my @columns = @{ &parse_number($columns) }; if ($command) { return $nexus->select_chars(\@columns, $title); }else { return $nexus->exclude_chars(\@columns, $title); } } sub selectbysets { my ($nexus, $mode, @setnames) = @_; my @otus; die "Provide set names as arguments\n" unless (@setnames); for my $setname (@setnames) { push(@otus, @{ $nexus->get_block('sets')->get_taxset($setname) } ); } if ( $mode == 1 ) { # "select" mode return $nexus->select_otus(\@otus); } else { # "exclude" mode return $nexus->exclude_otus(\@otus); } } # parse numbers in format "1-3, 4 6 8-10" sub parse_number { my $s = unquote( shift ); if (! $s =~ /^\s*(\d+(-\d+)?)([,\s]\s*\d+(-\d+)?)*\s*$/ ) { die "Invalid number format. Use 1 or 1-3 or 1, 3, 5-8 or 1 3 5 6-10.\n"; } $s =~ s/^\s+|\s+$//g; $s =~ s/,?\s+/,/g; # use ',' as separator my @cols = split(/,/, $s); my @arr; foreach my $item (@cols) { if ($item =~ /-/) { # eg 1-3 $item =~ /([0-9]+)\s*-\s*([0-9]+)/; for (my $i = $1; $i <= $2; $i++) { push ( @arr, $i-1 ); } } elsif ($item =~ /^\d+$/) { # eg 4 push ( @arr, $item-1 ); } elsif ($item) { die "non-number was used for column number\n"; } } @arr = sort {$a<=>$b} @arr; return \@arr; } sub unquote { my $string = shift; $string =~ s/^ *'(.*)' *$/$1/; $string =~ s/^ *"(.*)" *$/$1/; return( $string ); } sub exclude_blocks { my ($nexus, @blocks) = @_; return $nexus->exclude_blocks(\@blocks); } sub exclude_subtree { my ($nexus, $subtree) = @_; return $nexus->exclude_subtree($subtree); } sub reroottree { my ($nexus, $outgroup, $root_position, $treename) = @_; if ( $nexus->get_block('trees')->get_tree('$root_position') ) { #in case no root position is supplied, but a tree name is ($root_position, $treename) = (undef, $root_position); } return $nexus->reroot($outgroup, $root_position, $treename); } sub addtree { my ($nexus,$treeFile,$treeName) = @_; if ( !$treeName ) { ($treeName = (split("/",$treeFile))[-1]) =~ s/(.*)\..*/$1/; } if ( -f $treeFile ) { open(TF,"<$treeFile") || warn("ERROR: could not open $treeFile. No tree data will appear in NEXUS file."); my ($treeString); while() { s/\s*//go; $treeString .= $_; } close(TF); $treeString =~ s/(.*);$/$1/; #remove semicolon from end of $treeString, if there is one. #$treeString must be formatted like this in order to work w/ Bio::NEXUS::TreesBlock::new - # tree $treeName = $treeString #the following manipulations ensure we end up w/ such a string to pass to create_block $treeString =~ s/^tree(.*)$/$1/; unless ( $treeString =~ m/^$treeName=/ ) { $treeString = $treeName . " = " . $treeString; } $treeString = "tree " . $treeString; print STDERR "tree string:\n$treeString\n"; $nexus->add_block( $nexus->create_block('trees',$treeString, 1) ); } else { die("Tree file: \"$treeFile\" not found or could not be read"); } } sub setsbyfile { my ($nexus, @setfiles) = @_; my $sets; for my $setfile (@setfiles) { open (SF, "<$setfile") || die ("Could not open set file $setfile.\n\n"); while () { chomp; push (@{$$sets{$setfile}}, $_); } close (SF); } &add_or_create_set ($nexus, $sets); return $nexus; } sub setsbyinode { my ($nexus, @inodenames) = @_; my $sets; for my $inodename (@inodenames) { unless ($nexus->get_block('trees')->get_tree()->find($inodename)) { die "User-provided inode <$inodename> is not the name of an inode in this NEXUS file\nCommand aborted\n\n"; } my $subtree = $nexus->select_subtree($inodename); my $otus = $subtree->get_otus(); $$sets{$inodename} = $otus; } &add_or_create_set ($nexus, $sets); return $nexus; } sub setsbyclade { my ($nexus, @clade_elements) = @_; my ($sets, $treename);# $treename is currently a dummy variable, but we should implement it unless (validate_otus($nexus, [@clade_elements])) {die "User-provided OTUs failed validation against TAXA Block\n\n"} while (scalar(@clade_elements) > 0) { my ($otu1, $otu2) = splice(@clade_elements, 0, 2); unless (defined $otu2) {die "You have supplied an odd number of OTU's. This mode requires pairs.\n\n"} $otu1 = $nexus->get_block('trees')->get_tree($treename)->find($otu1); $otu2 = $nexus->get_block('trees')->get_tree($treename)->find($otu2); my $inode_name = $otu1 -> mrca($otu2, $treename) -> name(); my $otus = $nexus->select_subtree($inode_name)->get_otus(); $$sets{$inode_name} = $otus; } &add_or_create_set ($nexus, $sets); return $nexus; } sub setsbyotus { my ($nexus, @otuargs) = @_; my $otuargs = join(" ", @otuargs); my $sets; my @setsandelements = $otuargs =~ /\s*([^=]+)\s*=\s*\[\s*([^\]]+)\s*\]/g; while (@setsandelements) { $$sets{shift(@setsandelements)} = [split(/\s/, splice(@setsandelements, 1, 1))]; } my $validation = 1; for my $users_otus (values %$sets) { $validation = validate_otus($nexus, $users_otus); } unless ($validation == 1) {die "User-defined sets failed validation against TAXA Block\n\n"} &add_or_create_set ($nexus, $sets); return $nexus; } sub setsbyunion { my ($nexus, @setargs) = @_; my $setargs = join(" ", @setargs); my $sets = $nexus -> get_block('sets') -> get_taxsets(); my @setsandelements = $setargs =~ /\s*(\b[^=]+\b)\s*=\s*\[\s*([^\]]+)\s*\]/g; while (@setsandelements) { my ($newsetname, @subsets) = (shift(@setsandelements), split(/(?:\s+|\s*\+\s*)/, shift(@setsandelements))); my @otus; for my $subset (@subsets) { push(@otus, @{$nexus -> get_block('sets') -> get_taxset($subset)}); } $$sets{$newsetname} = [@otus]; } &add_or_create_set ($nexus, $sets); return $nexus; } sub setsbydifference { my ($nexus, @setargs) = @_; my $setargs = join(" ", @setargs); my $sets = $nexus -> get_block('sets') -> get_taxsets(); # print Dumper ($sets); @setargs = $setargs =~ /\s*([^=\s]+)\s*=\s*\[\s*([^-\s\]]+)\s*-\s*([^-\s\]]+)\s*\]/g; my (@difference_set_names, @minuend_set_names, @subtrahend_set_names); while (@setargs) { push(@difference_set_names, shift (@setargs)); push(@minuend_set_names, shift (@setargs)); push(@subtrahend_set_names, shift (@setargs)); } for (my $i = 0; $i < @difference_set_names; $i++) { $$sets{$difference_set_names[$i]} = [@{$$sets{$minuend_set_names[$i]}}]; for (my $j = 0; $j < @{$$sets{ $difference_set_names[$i] }}; $j++ ) { for my $subtrahendOTU ( @{$$sets{ $subtrahend_set_names[$i] }} ) { if ( ${$$sets{ $difference_set_names[$i] }}[$j] eq $subtrahendOTU) { splice ( @{$$sets{ $difference_set_names[$i] }}, $j, 1); } } } } &add_or_create_set ($nexus, $sets); return $nexus; } sub setsbycharstate { my ($nexus, @char_args) = @_; my ($proposed_sets, $title) = &parse_charstate_args(@char_args); my ($sets, @column_labels); my $block = $nexus->get_block("characters", $title); my $character_labels = $block->get_charlabels; my $seq_length = $block->get_nchar; my %char_matrix = %{$block->get_otuset->get_otu_sequences}; if ($character_labels && @$character_labels) { @column_labels = @$character_labels; } elsif ($seq_length) { for (1 .. $seq_length) {push @column_labels, $_;} } my %column_indices; for (my $i = 0; $i < @column_labels; $i++) { $column_indices{$column_labels[$i]} = $i; } foreach my $proposed_setname (keys %{$proposed_sets}){ while (@{$$proposed_sets{$proposed_setname}} > 0 ) { my ($column_label, $query_value) = splice(@{$$proposed_sets{$proposed_setname}}, 0, 2); foreach my $otu (keys %char_matrix) { my $locus_value = substr($char_matrix{$otu},$column_indices{$column_label},1); if ($query_value =~ m/$locus_value/) { push (@{$$sets{$proposed_setname}}, $otu); } } } } &add_or_create_set ($nexus, $sets); return $nexus; } sub parse_charstate_args { my $char_args = join (" ", @_); $char_args =~ m/title\s*=\s*(\w+)/i; my $title = $1; # store the captured title name, if one was provided my $num_title_matches = $char_args =~ s/title\s*=\s*\w+//g; # make sure the user only referred to one title (to prevent ambiguity) if ($num_title_matches > 1 ) {die ("Too many 'title' arguments--please reference only one Characters Block\n\n")} # the following line matches each set as defined by the user and stores it in the @proposed_sets array # basically, it looks for bracket pairs that look like foo[bar], # which may or may not be preceded by a set name declaration ('set_name = ') my @proposed_sets = $char_args =~ m/(?:[-\w]+\s*=\s*)?\s*[-\w]+\s*\[[^\]]+\]\s*/g; my %proposed_sets; for my $proposed_set (@proposed_sets) { # for each of the matches made above my $set_name; if ($proposed_set =~ m/^\s*([^\s\[\]]+)\s*=/) { # if a name declaration was matched $set_name = $1; # use that as the set name $proposed_set =~ s/^\s*[^\s\[\]]+\s*=\s*//; # then, get rid of that portion of the array element } else { # otherwise, make up a unique set name by joining the column label and locus value(s) $set_name = join("", ($proposed_set =~ m/[^"\s\[\]]/g)); } my ($column_label, $locus_value); while ($proposed_set =~ m/[^"\s]/) { # as long as $proposed_set contains more than spaces and quotes ($column_label) = $proposed_set =~ m/[^"\s\[]+/; # match the column label $proposed_set =~ m/\[([^\]]+)\]/; # and match the locus value(s) $locus_value = $1; $locus_value =~ s/[\s,]//g; # get rid of any spaces or commas the user may have separated the locus values with push (@{$proposed_sets{$set_name}}, $column_label, $locus_value); # push the column label and locus value(s) onto this sets array in the set hash $proposed_set =~ s/^[^\]]*\]//; # destroy the evidence } } return (\%proposed_sets, $title); } sub setsbycladeconsensus { my ($nexus, @args) = @_; my $consensus_sets = {}; my $treename; my $args = join(' ', @args); $args =~ m/title\s*=\s*(\w+)/i; my $title = $1; if ($title) {$args =~ s/\s*title\s*=\s*\w+\s*//i;} if ($args =~ /(\w+)/) {$treename = $1;} my $char_block = $nexus->get_block("characters", $title); my %otunames_sequences = %{$char_block->get_otuset->get_otu_sequences}; my $tree = $nexus->get_block('trees')->get_tree($treename)->clone(); my $rootnode = $tree->get_rootnode(); &set_consensus_seqs($rootnode, \%otunames_sequences); ($rootnode, $consensus_sets) = &simplify_tree($rootnode, $consensus_sets); $tree->set_name("nonredundant" . $tree->name()); $nexus->get_block('trees')->add_tree($tree); &add_or_create_set ($nexus, $consensus_sets); return $nexus; } sub set_consensus_seqs { my ($node, $otus_seqs) = @_; my @children = @{ $node->children() }; for my $child (@children) { if ( $child->is_otu() ) { $child->set_seq(${$otus_seqs}{$child->name()}); if ( ! $node->get_seq() || ( $node->get_seq() eq $child->get_seq() ) ) { $node->set_seq($child->get_seq()); } else { $node->set_seq('NO_CONSENSUS'); } } else { &set_consensus_seqs($child, $otus_seqs); if ( ! $node->get_seq() || ( $node->get_seq() eq $child->get_seq() ) ) { $node->set_seq($child->get_seq()); } else { $node->set_seq('NO_CONSENSUS'); } } } } sub simplify_tree { my ($node, $consensus_sets) = @_; my $setname; if ($node->get_seq() ne 'NO_CONSENSUS' && ! $node->is_otu() ) { my @otu_descendents = @{ $node->get_otus() }; $setname = $otu_descendents[0]->name() . "_GROUP"; ${ $consensus_sets }{$setname} = []; for my $descendent (@otu_descendents) { push(@{${$consensus_sets }{$setname}}, $descendent->name()); } my $newlength = $node->distance($otu_descendents[0]) + $node->length(); my $parent = $node->get_parent(); my $siblings = $node->get_siblings(); $parent->set_children($siblings); $parent->adopt($otu_descendents[0],0); $otu_descendents[0]->set_length($newlength); } else { for my $child (@{ $node->children() }) { simplify_tree($child, $consensus_sets); } } return ($node, $consensus_sets); } sub validate_otus { #Verifies that user-specified OTU names are taxlabels in the TAXA block my ($nexus, $users_otus) = @_; my $validation = 1; for my $users_otu (@$users_otus) { unless (defined ($nexus->get_block('taxa')->is_taxon($users_otu, 1))) { $validation = 0; } } return $validation; } sub add_or_create_set { my ($nexus, $sets) = @_; my $setsblock; if ($nexus -> get_block('Sets')) { $nexus -> get_block('Sets') -> add_taxsets($sets); } else { $setsblock = Bio::NEXUS::SetsBlock -> new('Sets'); $setsblock -> set_taxsets($sets); $nexus -> add_block($setsblock); } return $nexus; } sub removesets { my ($nexus, @usersets) = @_; my $failure = 0; my $sets_to_remove; my $setsblock = get_setsblock_if_setsblock($nexus); my $taxsets = $setsblock->get_taxsets(); unless (scalar(keys %$taxsets) > 0) {die "The SETS Block is already empty\n\n";} if ($usersets[0] =~ /^-p$/i) { shift @usersets; for my $userset (@usersets) { my $match = 0; TAXSET: for my $taxset (keys %$taxsets) { if ($taxset =~ /^$userset$/) { push(@$sets_to_remove, $taxset); $match = 1; next TAXSET; } } if ($match == 0) { warn "<$userset> does not match any sets in this NEXUS file\n"; $failure = 1; } } } else { for (my $i=0; $i < scalar(@usersets); $i++) { if ($$taxsets{$usersets[$i]}) { push (@$sets_to_remove, $usersets[$i]); } else { warn "<$usersets[$i]> is not the name of a set in this NEXUS file.\n"; $failure = 1; } } } die "Command aborted\n\n" if $failure == 1; $nexus->get_block('sets')->delete_taxsets(@$sets_to_remove); return $nexus; } sub renamesets { my ($nexus, @old_and_new) = @_; my $setsblock = get_setsblock_if_setsblock($nexus); $setsblock->rename_taxsets(@old_and_new); return $nexus; } sub listsets { my ($nexus, $outfile, @sets_to_list) = @_; my $setsblock = get_setsblock_if_setsblock($nexus); if (@sets_to_list > 0) { my $fh = open_filehandle($outfile); for (@sets_to_list) { my @otus = @{ $setsblock->get_taxset($_) }; if (@otus == 0) { warn("$_ = [] Possible error: Set is empty or does not exist\n\n"); } elsif (@otus > 0) { print $fh "$_ = [@otus]\n\n"; } } } else { $setsblock->print_all_taxsets($outfile); } } sub listsetnames { my ($nexus, $outfile) = @_; my $setsblock = get_setsblock_if_setsblock($nexus); my @setnames = @{ $setsblock->get_taxset_names() }; my $fh = open_filehandle($outfile); print $fh "@setnames\n"; } sub open_filehandle { my ($fh) = @_; if ($fh eq "-" || $fh eq \*STDOUT) {return \*STDOUT} else {open(FH, ">$fh") || die "Could not open $outfile for writing\n\n"} return \*FH; } sub get_setsblock_if_setsblock { my ($nexus) = @_; my $setsblock; unless ($setsblock = $nexus->get_block('sets')){die "This NEXUS file does not contain a SETS Block\n\n"} return $setsblock; } sub file_overwrite_warning { my ($infile, $outfile, $command, $silentmode) = @_; if ($silentmode == 1) {return 1} if ($infile eq $outfile) { warn("Do you really want to overwrite NEXUS file $infile with the $command output?\n"); if ( =~ /y/) { return 1; } else { die("Command aborted\n\n")} } else { return 1;} } ################# POD Documentation ################## __END__ =head1 NAME nextool.pl - Command-line NEXUS file manipulation tool =head1 DESCRIPTION Command-line Bio::NEXUS manipulation program =head1 SYNOPSIS nextool.pl [output_file] [COMMAND [arguments]] if outfile is not specified, just read input file and output to temp.nex Commands: rewrite rename_otus reroot [tree_name] select blocks select OTU or <-f filename> select tree select subtree [tree_name] select column or <-f filename> exclude blocks exclude OTU exclude subtree [treename] exclude column or <-f filename> makesets byfile [file2 file3 ...] makesets byinode [inode2 inode3 ...] makesets byclade [OTU3 OTU4 ...] Square brackets are required syntax in this command: makesets byotus =[] =[] ... Square brackets are required in this command: makesets byunion =[] =[] ... Square brackets are required in this command: makesets bydifference =[] =[] ... Square brackets around 'state' argument are required in this command: makesets bycharstate [title=char_block_title] [set1=]"[]" [set2=...] makesets bycladeconsensus [title=char_block_title] removesets listsets [set1 set2 ...] listsetnames renamesets oldname1 newname1 [oldname2 newname2 ...] =head1 COMMANDS =head2 rewrite Writes the contents of to . This is used to standardize the format of the file. this is the default program action if is not specified, the output file will be temp.nex =head2 rename_otus Renames OTUs in taxa, characters and trees blocks according to translation_file. Each line of translation_file contains the old name, separated by whitespace, then the new name. =head2 reroot [tree_name] reroot the tree of tree_name (optional-- the first tree in trees block if not specified) with outgroup_node_name as the new outgroup =head2 select blocks Select block given in list =head2 select OTU or <-f filename> Selects OTUs given in list. Changes taxa block, characters block and trees block. =head2 select tree Selects one tree given the tree name. Changes taxa/characters blocks to match. =head2 select subtree [tree_name] Selects subtree given the tree name and the internal node number. Changes taxa/characters blocks to match. =head2 select column or <-f filename> Selects a number of columns from character lists to get a new set of character lists (for a new Bio::NEXUS file). eg, "1-3, 5 8, 10-15" (comma or space can be used to separate numbers) =head2 exclude blocks Remove blocks from file =head2 exclude OTU Remove OTUs from file =head2 exclude subtree [treename] Remove subtree rooted with 'inode' in 'treename' or the first tree if 'treename' is not specified =head2 exclude column or <-f filename> Remove a number of columns from character lists to get a new set of character lists (for a new Bio::NEXUS file). eg, "1-3, 5 8, 10-15" (comma or space can be used to separate numbers) =head2 makesets byfile [file2 file3 ...] Add sets based on OTU's listed in simple text files. Sets take names of files; OTU's should be newline-delimited in the text files. =head2 makesets byinode [inode2 inode3 ...] Add sets based on ancestral internal nodes and their children =head2 makesets byclade [OTU3 OTU4 ...] Add sets by finding the children of the most recent common ancestor (mrca) of OTU1 and OTU2 pairs =head2 makesets byotus =[] =[] ... Add sets by specifying setnames and OTU's each set is to contain. Syntax is setname=[] =head2 makesets byunion =[] =[] ... Add sets by specifying setnames and which existing sets contain the OTUs the new set will comprise. Syntax is setname=[] =head2 makesets bydifference =[] =[] ... Add sets by specifying setnames and which existing sets contain the OTUs that will be used in defining the new sets. =head2 makesets bycharstate [title=char_block_title] [set1=]"[]" [set2=...] Add sets by specifying setnames and which existing sets contain the OTUs that will be used in defining the new sets. =head2 makesets bycladeconsensus [title=char_block_title] Add sets by finding clades that have a consensus sequence at all loci, and creating one set for each group of "otu synonyms" =head2 removesets [-p] set1 set2 ... Delete sets from NEXUS file. Will empty SETS Block but not remove it from file. '-p' switch allows you to specify regular expression patterns ('set\d*' will delete all sets of the form set.) QUOTATION MARKS AROUND PATTERNS IS STRONGLY RECOMMENDED. =head2 listsets Print sets (all sets by default, otherwise those provided by user) to output file, or standard out of STDOUT or - is used as output filename =head2 listsetnames Print setnames to output file, or standard out if STDOUT or - is used as output filename =head2 renamesets oldname1 newname1 [oldname2 newname2 ...] Rename sets by space-delimited pairs =head1 DESCRIPTION B provides several services in the manipulation of NEXUS files (selecting specific OTUs or tree nodes, combining files, renaming OTUs, etc. =head1 VERSION $Revision: 1.40 $ =head1 REQUIRES Bio::NEXUS =head1 AUTHOR Chengzhi Liang Peter Yang Tom Hladish =cut ##################### End ##########################