####################################################################### # UnalignedBlock.pm ####################################################################### # Author: Thomas Hladish # $Id: UnalignedBlock.pm,v 1.24 2007/09/24 04:52:14 rvos Exp $ #################### START POD DOCUMENTATION ########################## =head1 NAME Bio::NEXUS::UnalignedBlock - Represents an UNALIGNED block of a NEXUS file =head1 SYNOPSIS if ( $type =~ /unaligned/i ) { $block_object = new Bio::NEXUS::UnalignedBlock($type, $block, $verbose); } =head1 DESCRIPTION This is a class representing an unaligned block in NEXUS file =head1 FEEDBACK All feedback (bugs, feature enhancements, etc.) is greatly appreciated. =head1 AUTHORS Thomas Hladish (tjhladish at yahoo) =head1 VERSION $Revision: 1.24 $ =head1 METHODS =cut package Bio::NEXUS::UnalignedBlock; use strict; #use Data::Dumper; # XXX this is not used, might as well not import it! #use Carp;# XXX this is not used, might as well not import it! use Bio::NEXUS::Functions; use Bio::NEXUS::TaxUnitSet; use Bio::NEXUS::Matrix; use Bio::NEXUS::Util::Exceptions; use vars qw(@ISA $VERSION $AUTOLOAD); use Bio::NEXUS; $VERSION = $Bio::NEXUS::VERSION; @ISA = qw(Bio::NEXUS::Matrix); my $logger = Bio::NEXUS::Util::Logger->new(); =head2 new Title : new Usage : block_object = new Bio::NEXUS::UnalignedBlock($block_type, $commands, $verbose, $taxlabels); Function: Creates a new Bio::NEXUS::UnalignedBlock object Returns : Bio::NEXUS::UnalignedBlock object Args : type (string), the commands/comments to parse (array ref), and a verbose flag (0 or 1) =cut sub new { my ( $class, $type, $commands, $verbose, $taxa ) = @_; unless ($type) { ( $type = lc $class ) =~ s/Bio::NEXUS::(.+)Block/$1/i; } my $self = { type => $type }; bless $self, $class; $self->set_taxlabels($taxa); $self->{'otuset'} = new Bio::NEXUS::TaxUnitSet(); $self->_parse_block( $commands, $verbose ) if ( ( defined $commands ) and @$commands ); return $self; } =begin comment Title : _parse_format Usage : $format = $self->_parse_format($buffer); (private) Function: Extracts format values from line and stores values in a hash Returns : hash of formats Args : buffer (string) Methods : Separates formats by whitespace and creates hash containing key = format name and value = format value. =end comment =cut sub _parse_format { my ( $self, $string ) = @_; my %format = (); while ( $string =~ s/(\S+\s*=\s*[\"|\'][^\"\']+[\"|\'])// ) { my ( $name, $symbol ) = split /\s*=\s*/, $1; $format{ lc $name } = $symbol; } while ( $string =~ s/(\S+\s*=\s*\S+)// ) { my ( $name, $symbol ) = split /\s*=\s*/, $1; $format{ lc $name } = lc $symbol; } for my $other ( split /\s+/, $string ) { if ($other) { $format{ lc $other } = 1; } } return \%format; } =begin comment Title : _parse_matrix Usage : $self->_parse_matrix($buffer); (private) Function: Processes buffer containing matrix data Returns : none Args : buffer (string) Method : parse according to if name is quoted string or single word, if each state is single character or multi-character (use token keyword) =end comment =cut sub _parse_matrix { my ( $self, $matrix, $verbose ) = @_; my @taxa; my ( $name, $seq ) = (); # Build an array of hashrefs, where each hash has "name" and "seq" values # corresponding to the name and sequence found in each row of the matrix for my $row ( split /\n|\r/, $matrix ) { if ( $row =~ /^\s*$/ ) { next; } #for quoted taxon name if ( $row =~ /^\s*[\"|\']([^\"\']+)[\"|\']\s*([^\[]*)(\[.*\]\s*)*/ ) { ( $name, $seq ) = ( $1, $2 ); $name =~ s/\s+/_/g; if ( !$self->find_taxon($name) ) { Bio::NEXUS::Util::Exceptions::BadArgs->throw( 'error' => "Undefined Taxon: $name" ); } } else { # for one-word non-quoted taxon name $row =~ /^\s*(\S+)(\s*)([^\[]*)(\[.*\]\s*)*/; if ( $self->find_taxon($1) ) { $name = $1; $seq = $3; #print Dumper $seq; } else { print "taxon name $1 not found\n" if $verbose; $seq = $1 . $2 . $3; } } #print "> row: $row\n"; #print "> name: $name\n"; #print "> seq: $seq\n"; my $newtaxon = 1; for my $taxon (@taxa) { if ( $taxon->{'name'} eq $name ) { $taxon->{'seq'} .= ' ' . $seq; $newtaxon = 0; } } if ($newtaxon) { push @taxa, { name => $name, seq => $seq }; } } #print '> @taxa: '; # split each character my @otus; #print Dumper \@taxa; for my $taxon (@taxa) { $seq = $taxon->{'seq'}; $seq =~ s/^\s*(.*\S)\s*$/$1/; my @seq; while ( $seq =~ s/([^\(]+)|\(([^\(]+)\)// ) { # for +-(+ -)+- if ($1) { # for +- ### The following 4 commented lines of code are implemented in CharactersBlock.pm; they allow data tokens to be space-delimited. ### Unaligned blocks do not include the tokens or continuous formats according the Maddison et al. We ### may decide that we don't want to restrict unaligned data to DNA/RNA/AA the way Maddison et al have. # if ($self->get_format->{'tokens'} || lc $self->get_format->{'datatype'} eq 'continuous') { #LINE 1 # push @seq, split /\s+/, $1; #LINE 2 # } else { #LINE 3 push @seq, split /\s*/, $1; # } #LINE4 } elsif ($2) { push @seq, [ split /,\s*|\s+/, $2 ]; # for (+ -) } } push @otus, Bio::NEXUS::TaxUnit->new( $taxon->{'name'}, \@seq ); } my $otuset = $self->get_otuset(); $otuset->set_otus( \@otus ); $self->set_taxlabels( $otuset->get_otu_names() ); return \@otus; } =head2 find_taxon Title : find_taxon Usage : my $is_taxon_present = $self->find_taxon($taxon_name); Function: Finds whether the input taxon name is present in the taxon label. Returns : 0 (not present) or 1 (if present). Args : taxon label (as string) =cut sub find_taxon { my ( $self, $name ) = @_; if ( @{ $self->get_taxlabels || [] } == 0 ) { return 1; } for my $taxon ( @{ $self->get_taxlabels() } ) { if ( lc $taxon eq lc $name ) { return 1; } } return 0; } =head2 set_format Title : set_format Usage : $block->set_format(\%format); Function: set the format of the characters Returns : none Args : hash of format values =cut sub set_format { my ( $self, $format ) = @_; $self->{'format'} = $format; } =head2 get_format Title : get_format Usage : $block->get_format(); Function: Returns the format of the characters Returns : hash of format values Args : none =cut sub get_format { shift->{'format'} || {} } =head2 set_otuset Title : set_otuset Usage : $block->set_otuset($otuset); Function: Set the otus Returns : none Args : TaxUnitSet object =cut sub set_otuset { my ( $self, $otuset ) = @_; $self->{'otuset'} = $otuset; $self->set_taxlabels( $otuset->get_otu_names() ); } =head2 set_charstatelabels Title : set_charstatelabels Usage : $block->set_charstatelabels($labels); Function: Set the character names and states Returns : none Args : array of character states =cut sub set_charstatelabels { my ( $self, $charstatelabels ) = @_; $self->get_otuset->set_charstatelabels($charstatelabels); } =head2 get_charstatelabels Title : get_charstatelabels Usage : $set->get_charstatelabels(); Function: Returns an array of character states Returns : character states Args : none =cut sub get_charstatelabels { my ($self) = @_; return $self->get_otuset->get_charstatelabels(); } =head2 get_ntax Title : get_ntax Usage : $block->get_ntax(); Function: Returns the number of taxa of the block Returns : # taxa Args : none =cut sub get_ntax { my $self = shift; return $self->get_otuset()->get_ntax(); } =head2 rename_otus Title : rename_otus Usage : $block->rename_otus(\%translation); Function: Renames all the OTUs to something else Returns : none Args : hash containing translation =cut sub rename_otus { my ( $self, $translation ) = @_; $self->get_otuset()->rename_otus($translation); } =head2 add_otu_clone Title : add_otu_clone Usage : ... Function: ... Returns : ... Args : ... =cut sub add_otu_clone { my ( $self, $original_otu_name, $copy_otu_name ) = @_; # print "Warning: Bio::NEXUS::UnalignedBlock::add_otu_clone() method not fully implemented\n"; if ($self->find_taxon($copy_otu_name)) { print "Error: an OTU with that name [$copy_otu_name] already exists.\n"; } else { $self->add_taxlabel($copy_otu_name); } my @otu_set = (); if (defined $self->{'otuset'}->{'otus'}) { @otu_set = @{ $self->{'otuset'}->{'otus'} }; } foreach my $otu (@otu_set) { if (defined $otu) { if ($otu->get_name() eq $original_otu_name) { my $otu_clone = $otu->clone(); $otu_clone->set_name($copy_otu_name); $self->{'otuset'}->add_otu($otu_clone); } } } } =head2 equals Name : equals Usage : $block->equals($another); Function: compare if two Bio::NEXUS::UnalignedBlock objects are equal Returns : boolean Args : a Bio::NEXUS::CharactersBlock object =cut sub equals { my ( $self, $block ) = @_; if ( !Bio::NEXUS::Block::equals( $self, $block ) ) { return 0; } return $self->get_otuset()->equals( $block->get_otuset() ); } =begin comment Name : _write Usage : $block->_write(); Function: Writes NEXUS block containing unaligned data Returns : none Args : file name (string) =end comment =cut sub _write { my ( $self, $fh, $verbose ) = @_; $fh ||= \*STDOUT; Bio::NEXUS::Block::_write( $self, $fh ); $self->_write_matrix_info( $fh, $verbose ); $self->_write_matrix( $fh, $verbose ); print $fh "END;\n"; return; } =begin comment Name : _write_matrix_info Usage : $self->_write_matrix_info($file_handle,$verbose); Function: Writes UnalignedBlock info (all the block content except the matrix data) into the filehandle Returns : none Args : $file_handle and $verbose =end comment =cut sub _write_matrix_info { my ( $self, $fh, $verbose ) = @_; $fh ||= \*STDOUT; my $ntax = $self->get_ntax(); print $fh "\tDIMENSIONS ntax=$ntax;\n"; my %formats = %{ $self->get_format() }; if ( scalar keys %formats ) { print $fh "\tFORMAT "; if ( defined $formats{'datatype'} ) { print $fh " datatype=$formats{'datatype'}"; } for my $format ( keys %formats ) { if ( !$formats{$format} || $format =~ /datatype/i ) { next; } elsif ( $formats{$format} eq '1' ) { print $fh " $format"; } else { print $fh " $format=$formats{$format}"; } } print $fh ";\n"; } return; } =begin comment Name : _write_matrix Usage : $self->_write_matrix($file_handle,$verbose); Function: Writes UnalignedBlock matrix( The data stored in the matrix command) into the filehandle Returns : none Args : $file_handle and $verbose =end comment =cut sub _write_matrix { my ( $self, $fh, $verbose ) = @_; $fh ||= \*STDOUT; my @otus = @{ $self->get_otuset()->get_otus() }; print $fh "\tMATRIX\n"; for my $otu (@otus) { my $seq = $otu->get_seq_string(); print $fh "\t", $otu->get_name(), "\t", $seq, "\n"; } print $fh "\t;\n"; return; } sub AUTOLOAD { return if $AUTOLOAD =~ /DESTROY$/; my $package_name = __PACKAGE__ . '::'; # The following methods are deprecated and are temporarily supported # via a warning and a redirection my %synonym_for = ( "${package_name}set_charstates" => "${package_name}set_charstatelabels", "${package_name}get_charstates" => "${package_name}get_charstatelabels", ); if ( defined $synonym_for{$AUTOLOAD} ) { $logger->warn( "$AUTOLOAD() is deprecated; use $synonym_for{$AUTOLOAD}() instead" ); goto &{ $synonym_for{$AUTOLOAD} }; } else { Bio::NEXUS::Util::Exceptions::UnknownMethod->throw( 'error' => "ERROR: Unknown method $AUTOLOAD called" ); } } 1;