package InSilicoSpectro::InSilico::AASequence; # Perl object class for protein sequences #Copyright (C) 2005 Alexandre Masselot and Jacques Colinge #This library is free software; you can redistribute it and/or #modify it under the terms of the GNU Lesser General Public #License as published by the Free Software Foundation; either #version 2.1 of the License, or (at your option) any later version. #This library is distributed in the hope that it will be useful, #but WITHOUT ANY WARRANTY; without even the implied warranty of #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #Lesser General Public License for more details. #You should have received a copy of the GNU Lesser General Public #License along with this library; if not, write to the Free Software #Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA use strict; require Exporter; use Carp; use InSilicoSpectro::Utils::io; use InSilicoSpectro::InSilico::Sequence; use InSilicoSpectro::InSilico::MassCalculator; our (@ISA, @EXPORT, @EXPORT_OK, $isBioPerl); @ISA = qw(InSilicoSpectro::InSilico::Sequence); @EXPORT = qw($qrValidAASeq); @EXPORT_OK = (); # Visible attributes controled vocabulary our %visibleAttr = (readingFrame=>1, modif=>1); our $qrValidAASeq=qr/^[ACDEFGHIJKLMNOPQRSTUVWY]*$/; return 1; =head1 NAME InSilicoSpectro::InSilico::AASequence - Elementary protein sequence. =head1 SYNOPSIS use InSilicoSpectro::InSilico::AASequence; =head1 DESCRIPTION Inherits from InSilicoSpectro::InSilico::Sequence. The AASequence class is intended to store protein sequence data. =head1 ATTRIBUTES =over 4 =item readingFrame Set to -3, -2, -1, 1, 2, or 3 in case the sequence was obtained by RNA/DNA translation. =item =back =head1 METHODS =head2 new([%h|$bpSeq|$Sequence|$AASequence]) Constructor. %h is a hash of attribute=>value pairs, $bpSeq is a BioPerl Bio::seq object, from which the attributes are copied, $Sequence and $AASequence are InSilicoSpectro::InSilico::Sequence and InSilicoSpectro::InSilico::AASequence respectively. =cut sub new { my $pkg = shift; my $class = ref($pkg) || $pkg; my $seq; if (ref($_[0]) && $_[0]->isa('InSilicoSpectro::InSilico::AASequence')){ $seq = {}; %$seq = %{$_[0]}; bless($seq, $class); } elsif (ref($_[0]) && $_[0]->isa('InSilicoSpectro::InSilico::Sequence')){ $seq = new InSilicoSpectro::InSilico::Sequence($_[0]); bless($seq, $class); } else{ $seq = new InSilicoSpectro::InSilico::Sequence(@_); bless($seq, $class); if (!ref($_[0])){ my %h = @_; foreach (keys(%h)){ $seq->$_($h{$_}) if ($visibleAttr{$_}); } } } return $seq; } # new =head2 sequence([$val]) sequence accessor/modifier: sets sequence attribute if $val is given, returns the sequence attribute. =cut sub sequence { my $this = shift; undef($this->{mass}); return $this->SUPER::sequence(@_); } # sequence =head2 readingFrame([$val]) readingFrame accessor/modifier: sets readingFrame attribute if $val is given, returns the readingFrame attribute. =cut sub readingFrame { my ($this, $val) = @_; if ($val){ $val = int($val); if (($val >= -3) && ($val <= 3)){ $this->{readingFrame} = $val; } else{ croak("Illegal reading frame [$val]"); } } return $this->{readingFrame}; } # readingFrame =head2 modif([$modif]) Modifications accessor/modifier: sets modifications if $modif, a reference to vector of modification names or a string is given (see Pheny::InSilico::MassCalculator::variablePeptide function for instance), returns a reference to a vector of modifications. This vector can be converted into a string for display purpose by the Pheny::InSilico::MassCalculator::modifToString function. =cut sub modif { my ($this, $modif) = @_; if (defined($modif)){ if (ref($modif) eq 'ARRAY'){ $this->{modif} = [@$modif]; undef($this->{mass}); } elsif (!ref($modif)){ $this->{modif} = [split(/:/, $modif)]; undef($this->{mass}); } else{ croak("Invalid modification format [$modif]"); } } return $this->{modif}; } # modif =head2 modifAt($pos, [$modif]) Accessor/modifier for modification at position $pos. Sets the modification if $modif, a string, is provided. $pos = 0 is the N-terminal site, $pos = protein length +1 is the C-terminal site, and 1 <= $pos <= protein length correspond to amino acids. To remove a modification set it to an empty string ''. =cut sub modifAt { my ($this, $pos, $modif) = @_; croak("No sequence defined") if ($this->getLength() == 0); $pos = int($pos); croak("Invalid position [$pos]") if (($pos < 0) || ($pos > $this->getLength()+1)); if ($modif){ $this->{modif}[$pos] = $modif; undef($this->{mass}); } return $this->{modif}[$pos] = $modif; } # modifAt =head2 getMass Returns the protein mass or undefined in case either the protein sequence is not set or there are variable modifications. =cut sub getMass { my $this = shift; return $this->{mass} if (defined($this->{mass}) && ($this->{massType} == InSilicoSpectro::InSilico::MassCalculator::getMassType())); return undef if (!defined($this->{sequence})); my @list; foreach (@{$this->{modif}}){ if (length($_) > 0){ return undef if (index($_, '(*)') != -1); push(@list, $_); } } my $mass = InSilicoSpectro::InSilico::MassCalculator::getPeptideMass(pept=>$this->{sequence}, modif=>\@list); ; $this->{mass} = $mass; $this->{massType} = InSilicoSpectro::InSilico::MassCalculator::getMassType(); return $mass; } # getMass =head1 EXAMPLES See t/InSilico/testAASequence.pl. =head1 AUTHORS Alexandre Masselot, www.genebio.com Jacques Colinge, Upper Austria University of Applied Science at Hagenberg =cut