################################################################# # Functions.pm ################################################################# # Author: Thomas Hladish # $Id: Functions.pm,v 1.15 2007/09/21 23:09:09 rvos Exp $ #################### START POD DOCUMENTATION ################## =head1 NAME Bio::NEXUS::Functions - Provides private utiliy functions for the module =head1 SYNOPSIS =head1 DESCRIPTION This package provides private functions that are not object-specific. =head1 COMMENTS =head1 FEEDBACK All feedback (bugs, feature enhancements, etc.) is greatly appreciated. =head1 AUTHORS Thomas Hladish (tjhladish at yahoo) =head1 VERSION $Revision: 1.15 $ =head1 METHODS =cut package Bio::NEXUS::Functions; use strict; #use Data::Dumper; # XXX this is not used, might as well not import it! #use Carp; # XXX this is not used, might as well not import it! use Bio::NEXUS::Util::Exceptions; use vars qw(@EXPORT @EXPORT_OK @ISA $VERSION); use Bio::NEXUS; $VERSION = $Bio::NEXUS::VERSION; use Exporter (); @ISA = qw ( Exporter ); @EXPORT = qw( &_slurp &_parse_nexus_words &_ntsa &_stna &_quote_if_needed &_nexus_formatted &_is_comment &_is_number &_is_dec_number &_sci_to_dec &_unique &_nonunique &_share_elements &_fast_in_array &_in_array &_is_same_array ); ## READING & PARSING FUNCTIONS: =begin comment Name : _slurp Usage : $file_content = _slurp($filename); Function: reads an entire file into memory Returns : none Args : file name (string) =end comment =cut sub _slurp { my ($filename) = @_; open my $fh, '<', "$filename" || Bio::NEXUS::Util::Exceptions::FileError->throw( 'error' => "ERROR: Could not open filename <$filename> for input; $!" ); my $file_content = do { local ($/); <$fh> }; return $file_content; } =begin comment Title : _parse_nexus_words Usage : $parsed_words = _parse_nexus_words($buffer); Function: parse a string of text into "words" (as defined in the NEXUS standard) Returns : an array ref of "words" and punctuation marks. Single-quoted expressions are single "words". Double quotes are not supported. Args : text buffer Notes : this method has replaced _parse_string_tokens(), which did not conform to the NEXUS standard in all its quirky splendor (particularly with regard to punctuation) =end comment =cut sub _parse_nexus_words { my $buffer = shift; if ( not defined $buffer ) { Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' => '_parse_nexus_words() requires a text string argument (the text to be parsed)' ); } my @words; my ( $word, $in_quotes ) = ( q{}, 0 ); my @chars = split( //, $buffer ); my $comment_level = 0; # iterate through the characters for ( my $i = 0; $i < @chars; $i++ ) { my $char = $chars[$i]; my $next = $chars[ $i + 1 ]; if ($comment_level) { $comment_level++ if ( $char eq '[' ); $comment_level-- if ( $char eq ']' ); $word .= $char; } # If we see a quote elsif ( $char eq q{'} ) { # and we're already inside quotes . . . if ($in_quotes) { # check to see if this is an escaped (doubled single) quote, # (unless we're already at the end of the string to be parsed). if ( defined $next && $next eq q{'} ) { # If it is, append it to the current word; $word .= $char; } else { # otherwise, close off the quoted string $in_quotes--; # Replace spaces with underscores # (according to NEXUS, they're equivalent) $word =~ s/ /_/g; # Push it onto the word list, after # dealing with funny apostrophe business push @words, _ntsa($word); # And clean the slate $word = q{}; } } else { # If we weren't in quotes before, we are now $in_quotes++; } } elsif ($in_quotes) { # We're in a quoted string, so anything can be part of the word $word .= $char; } elsif ( $char eq '[' ) { $comment_level++; $word .= $char; } # If we see NEXUS-style punctuation elsif ( $char =~ /[\[\]\-(){}\/\\,;:=*"`+<>]/ ) { push @words, &_ntsa($word) # $word will be q{} if there was a preceding space; # otherwise, it will contain some string unless $word eq q{}; # then that counts as a word (we'll deal with pos/neg # numbers later in _rebuild_numbers() if that gets called) push @words, $char; $word = q{}; } # If we see whitespace elsif ( $char =~ /\s/ ) { # then we just finished a [probably] normal, space-delimited word push @words, &_ntsa($word) unless $word eq q{}; # although we don't want to keep pushing it # if there are multiple spaces, so we empty $word $word = q{}; } # If $word isn't quoted, and $char is neither punctuation nor whitespace else { $word .= $char; } } push @words, $word unless $word eq q{}; return \@words; } sub _rebuild_numbers { my $words = shift; my @new_words; # Don't bother checking whether the last word is a '+' or '-' for ( my $i = 0; $i < ( @$words - 1 ); $i++ ) { my $word = $words->[$i]; my $next = $words->[ $i + 1 ]; # There will always be a next # my $next_next = defined $words[$i +2] ? $words[$i+2] : q{}; # There might be a previous my $last = $i == 0 ? undef: $words->[ $i - 1 ]; if ( $word eq '-' || $word eq '+' ) { if ( my ( $num, $exp ) = $next =~ /^([\d.]+)(e)?/i ) { if ( _is_dec_number($num) ) { $word .= $next; $i++; if ($exp) { } } } } else { push @new_words, $word; } } return \@new_words; } =begin comment Title : _ntsa (nexus to standard apostrophe) Usage : $standard_word = $block->_ntsa($nexus_word); Function: change doubled single quotes to single single quotes (apostrophes) Returns : a standard english word (or phrase) Args : a nexus "word" Notes : See NEXUS definition of "word" for an explanation =end comment =cut sub _ntsa { my $nexus_word = shift; $nexus_word =~ s/[^']''[^']/'/g; return $nexus_word; } =begin comment Title : _stna (standard to nexus apostrophe) Usage : $nexus_word = $block->_stna($standard_word); Function: change single single quotes (apostrophes) to double single quotes Returns : a nexus "word" Args : a standard english word (or phrase) Notes : See NEXUS definition of "word" for an explanation =end comment =cut sub _stna { my $standard_word = shift; $standard_word =~ s/[^']'[^']/''/g; return $standard_word; } =begin comment Title : _quote_if_needed Usage : $string = Bio::NEXUS::Block::_quote_if_needed($string); Function: put single quotes around string if it contains spaces or NEXUS punctuation Returns : a string, in single quotes if necessary Args : a string =end comment =cut sub _quote_if_needed { my $nexus_word = shift; if ( $nexus_word =~ /[-\s(){}\[\]\/\\,;:=+*<>`'"]/ ) { return "'$nexus_word'"; } else { return $nexus_word; } } =begin comment Title : _nexus_formatted Usage : $string = Bio::NEXUS::Block::_nexus_formatted($string); Function: escape apostrophes and quote strings as needed for NEXUS output Returns : a string Args : a string =end comment =cut sub _nexus_formatted { my $nexus_word = shift; $nexus_word = _quote_if_needed( _stna($nexus_word) ); return $nexus_word; } =begin comment Name : _is_comment Usage : $boolean = _is_comment($string); Function: tests whether something looks like a comment Returns : boolean Args : string to test =end comment =cut sub _is_comment { my ($string) = @_; if ( $string =~ /^\[.*\]$/s ) { return 1 } else { return 0 } } =begin comment Title : _is_dec_number Usage : if ( _is_dec_number($num) ) { do_something() }; Function: verifies that a number is a normal decimal number (e.g. 3 or 9.41) Returns : 1 if $num is a number, otherwise 0 Args : a number =end comment =cut sub _is_dec_number { my ($number) = @_; return 0 unless defined $number && length $number; my $number_regex = qr/^[-+]? # positive or negative (?: \d+ # e.g., 523 | \d*[.]\d+ # 3.14 or .45 | \d+[.]\d* # 212. or 212.0 ) $/x; return 0 unless defined $number && $number =~ $number_regex; return 1; } =begin comment Title : _is_number Usage : if ( _is_number($num) ) { do_something() }; Function: verifies that a number is of reasonable form (such as 0.4 or 6.1e2.1) Returns : 1 if $num is a number, otherwise 0 Args : a number =end comment =cut sub _is_number { my ($number) = @_; return 0 unless defined $number && length $number; my ( $num, $exp ) = $number =~ /^([^e]+)(?:e([^e]+))?$/i; return 0 unless _is_dec_number($num); return _is_dec_number($exp) if defined $exp; return 1; } =begin comment Title : _sci_to_dec Usage : $decimal = _sci_to_dec($scientic_notation); Function: Changes scientific notation to decimal notation Returns : scalar (a number) Args : scalar (a number), possibly in scientific notation =end comment =cut sub _sci_to_dec { my ($sci_num) = @_; $sci_num =~ s/\s//g; return $sci_num if _is_dec_number($sci_num); my ( $num, $exp ) = $sci_num =~ /^ ([^e]+) e ([^e]+) $/ix; return 0 unless ( _is_dec_number($num) && _is_dec_number($exp) ); my $dec_num = $num * ( 10**$exp ); return $dec_num; } ## ARRAY FUNCTIONS: =begin comment Name : _any Usage : _any($filename); Function: reads an entire file into memory Returns : none Args : file name (string) =end comment =cut sub _unique { my (@array) = @_; my %seen = (); # from perl cookbook. fast, and preserves order my @unique = grep { !$seen{$_}++ } @array; return @unique; } sub _nonunique { my (@array) = @_; my %seen = (); my @nonunique = grep { $seen{$_}++ } @array; return @nonunique; } sub _share_elements { my ( $array1, $array2 ) = @_; for my $element1 (@$array1) { if ( &in_array( $array2, $element1 ) ) { return 1; } } return 0; } sub _fast_in_array { my ( $array, $element ) = @_; for (@$array) { if ( $element eq $_ ) { return 1; } } return 0; } sub _in_array { my ( $array, $test ) = @_; my $match = 0; for (@$array) { $match++ if $_ eq $test; } return $match; } sub _is_same_array { my ( $array, $test ) = @_; return 1 if $array eq $test; return 0 unless scalar @$array == scalar @$test; my $astr = join '', sort @$array; my $tstr = join '', sort @$test; return 1 if $astr eq $tstr; return 0; } 1;