package InSilicoSpectro::InSilico::Peptide; # Perl object class for peptides # Copyright (C) 2005 Jacques Colinge and Alexandre Masselot # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA use strict; require Exporter; use Carp; use InSilicoSpectro::InSilico::MassCalculator; use InSilicoSpectro::InSilico::AASequence; use InSilicoSpectro::InSilico::CleavEnzyme; use InSilicoSpectro::Utils::io; our (@ISA, @EXPORT, @EXPORT_OK); @ISA = qw(Exporter); @EXPORT = qw(); @EXPORT_OK = (); our %visibleAttr = (sequence=>1, modif=>1, parentProtein=>1, start=>1, end=>1, readingFrame=>1, nTerm=>1, cTerm=>1, enzymatic=>1, enzyme=>1, aaBefore=>1, aaAfter=>1, nmc=>1, addProton=>1); our %aaList = split(//, 'A1C1D1E1F1G1H1I1K1L1M1N1P1Q1R1S1T1V1W1Y1'); return 1; =head1 NAME InSilicoSpectro::InSilico::Peptide - A class for digestion products =head1 SYNOPSIS use InSilicoSpectro::InSilico::Peptide; =head1 DESCRIPTION This class role is to model peptides obtained by enzymatic digestion (InSilicoSpectro::InSilico::CleavEnzyme) from a protein sequence (InSilicoSpectro::InSilico::AASequence). =head1 ATTRIBUTES =over 4 =item sequence =item modif The localized modifications for MS/MS computations or a list of modifications with number of occurence for PMF computations. See method getModifType. =item parentProtein A InSilicoSpectro::InSilico::AASequence object containing the protein sequence from which the peptide has been obtained. =item start Start position in the parent protein sequence. =item end End position in the parent protein sequence. =item readingFrame Useful in case the digested protein sequence came from DNA/RNA translation. =item nTerm Boolean, is the peptide an N-terminal peptide. =item cTerm Boolean, is the peptide a C-terminal peptide. =item enzyme The enzyme that yielded the peptide (useful for mass computations because of NTermGain and CTermGain for "exotic" enzymes); must be of class InSilicoSpectro::InSilico::CleavEnzyme. =item aaBefore Amino acid immediately before the peptide (at its N-terminus). =item aaAfter Amino acid immediately after the peptide (at its C-terminus). =item enzymatic Was the peptide cleaved by the enzyme at its both ends (value 'full'), at its N-terminus only (value 'half-N'), at its C-terminus only (value 'half-C'), or not enzymatic (value 'no'). This method is the accessor/modifier for attribute enzymatic. =item nmc Number of missed cleavages. =item addProton Set to 1 if you want to add the mass of one proton to the peptide mass when it is computed (for PMF). Otherwise either set to 0 or do not set. =back =head1 METHODS =head2 new(%h|$Peptide) Constructor. %h is a hash of attribute=>value pairs, $Peptide is a InSilicoSpectro::InSilico::Peptide object, from which the attributes are copied. =cut sub new { my $pkg = shift; my $pept = {}; my $class = ref($pkg) || $pkg; bless($pept, $class); if (ref($_[0]) && $_[0]->isa('InSilicoSpectro::InSilico::Peptide')){ %$pept = %{$_[0]}; undef($pept->{mass}); bless($pept, $class); } else{ bless($pept, $class); if (!ref($_[0])){ my %h = @_; foreach (keys(%h)){ $pept->$_($h{$_}) if ($visibleAttr{$_}); } } } return $pept; } # new =head1 Accessors and modifiers =head2 parentProtein([$val]) Returns the parent protein object (InSilicoSpectro::InSilico::AASequence). If $val is given, then the parent protein is set to $val. The end and start positions are left unchanged by this method, hence do not forget to ajust them if needed. The readingFrame is copied from the protein. =cut sub parentProtein { my ($this, $val) = @_; if (defined($val)){ if ($val->isa('InSilicoSpectro::InSilico::AASequence')){ $this->{parentProtein} = $val; $this->{readingFrame} = $val->{readingFrame}; undef($this->{mass}); } else{ croak("The object must be of class InSilicoSpectro::InSilico::AASequence [".ref($val)."]"); } } return $this->{parentProtein}; } # parentProtein =head2 sequence([$val]) Returns the peptide sequence as a string. If $val is given then the peptide sequence is set to $val. =cut sub sequence { my ($this, $val) = @_; if (defined($val)){ $val =~ s/\s//g; $this->{sequence} = $val; undef($this->{mass}); } if (defined($this->{sequence})){ return $this->{sequence}; } else{ # No sequence is only possible if there is a reference to a Sequence object if (!defined($this->{start}) || !defined($this->{end}) || !defined($this->{parentProtein})){ croak("All of parentProtein, start, end must be defined if no sequence is set"); } if ($this->{start} < $this->{end}){ # Direct orientation return substr($this->parentProtein->sequence, $this->{start}, $this->{end}-$this->{start}+1); } else{ # Reverse orientation return substr($this->parentProtein->sequence, $this->{end}, $this->{start}-$this->{end}+1); } } } # sequence =head2 enzyme([$val]) Enzyme accessor/modifier. =cut sub enzyme { my ($this, $val) = @_; if (defined($val)){ if (ref($val) && $val->isa('InSilicoSpectro::InSilico::CleavEnzyme')){ $this->{enzyme} = $val; undef($this->{mass}); } else{ croak("Illegal enzyme class [$val]"); } } return $this->{enzyme}; } # enzyme =head2 location($start, $end, readingFrame) Returns a vector with (start, end, readingFrame). If $start, $end, and readingFrame are given then it sets them. =cut sub location { my ($this, $start, $end, $readingFrame); return ($this->start($start), $this->end($end), $this->readingFrame($readingFrame)); } # location =head2 start([$val]) Start position accessor/modifier. =cut sub start { my ($this, $val) = @_; if (defined $val){ $val = int($val); if ($val >= 0){ $this->{start} = $val; undef($this->{mass}); } else{ croak("Negative position [$val]"); } } return $this->{start}; } # start =head2 end([$val]) End position accessor/modifier. =cut sub end { my ($this, $val) = @_; if (defined $val){ $val = int($val); if ($val >= 0){ $this->{end} = $val; undef($this->{mass}); } else{ croak("Negative position [$val]"); } } return $this->{end}; } # end =head2 readingFrame([$val]) readingFrame accessor/modifier: sets readingFrame attribute if $val is given, returns the readingFrame attribute. =cut sub readingFrame { my ($this, $val) = @_; if ($val){ $val = int($val); if (($val >= -3) && ($val <= 3)){ $this->{readingFrame} = $val; } else{ croak("Illegal reading frame [$val]"); } } return $this->{readingFrame}; } # readingFrame =head2 nTerm([$val]) NTerm position accessor/modifier. =cut sub nTerm { my ($this, $val) = @_; if (defined $val){ $val = int($val); if (($val == 0) || ($val == 1)){ $this->{nTerm} = $val; } else{ croak("Illegal value [$val]"); } } return $this->{nTerm}; } # nTerm =head2 cTerm([$val]) CTerm position accessor/modifier. =cut sub cTerm { my ($this, $val) = @_; if (defined $val){ $val = int($val); if (($val == 0) || ($val == 1)){ $this->{cTerm} = $val; } else{ croak("Illegal value [$val]"); } } return $this->{cTerm}; } # cTerm =head2 addProton([$val]) aaBefore position accessor/modifier. =cut sub addProton { my ($this, $val) = @_; if (defined $val){ $val = int($val); if (($val == 0) || ($val == 1)){ $this->{addProton} = $val; undef($this->{mass}); } else{ croak("Illegal value [$val]"); } } return $this->{addProton}; } # addProton =head2 aaBefore([$val]) NTerm position accessor/modifier. =cut sub aaBefore { my ($this, $val) = @_; if (defined $val){ if ($aaList{$val}){ $this->{aaBefore} = $val; } else{ warn("[WARNING] Illegal amino acid [$val]"); } } return $this->{aaBefore}; } # aaBefore =head2 aaAfter([$val]) aaAfter accessor/modifier. =cut sub aaAfter { my ($this, $val) = @_; if (defined $val){ if ($aaList{$val}){ $this->{aaAfter} = $val; } else{ warn("[WARNING] amino acid [$val]"); } } return $this->{aaAfter}; } # aaAfter =head2 enzymatic([$val]) Enzymatic status accessor/modifier. =cut sub enzymatic { my ($this, $val) = @_; if (defined $val){ if (($val eq 'full') || ($val eq 'half-N') || ($val eq 'half-C') || ($val eq 'no')){ $this->{enzymatic} = $val; } else{ croak("Illegal enzymatic status [$val]"); } } return $this->{enzymatic}; } # enzymatic =head2 nmc([$val]) Number of missed cleavages accessor/modifier. =cut sub nmc { my ($this, $val) = @_; if (defined $val){ $val = int($val); if ($val >= 0){ $this->{nmc} = $val; } else{ croak("Illegal number of missed cleavages [$val]"); } } return $this->{nmc}; } # nmc =head2 modif([$modif]) Modifications accessor/modifier: sets modifications if $modif, a reference to vector of modification names or a string is given (see Pheny::InSilico::MassCalculator::variablePeptide function for instance), returns a reference to a vector of modifications. This vector can be converted into a string for display purpose by the Pheny::InSilico::MassCalculator::modifToString function. =cut sub modif { my ($this, $modif) = @_; if (defined($modif)){ if (ref($modif) eq 'ARRAY'){ $this->{modif} = [@$modif]; undef($this->{mass}); $this->{modifType} = ((length($modif->[0]) > 0) && ($modif->[0] eq int($modif->[0]))) ? 'PMF' : 'MS/MS'; } elsif (!ref($modif)){ $this->{modif} = [split(/:/, $modif)]; undef($this->{mass}); $this->{modifType} = 'MS/MS'; } else{ croak("Invalid modification format [$modif]"); } } return $this->{modif}; } # modif =head2 modifAt($pos, [$modif]) Accessor/modifier for modification at position $pos. Sets the modification if $modif, a string, is provided. $pos = 0 is the N-terminal site, $pos = peptide length +1 is the C-terminal site, and 1 <= $pos <= peptide length correspond to amino acids. To remove a modification set it to an empty string ''. This method is for localized modifications in view of MS/MS and forces the modification type to be MS/MS (clears previous PMF modifications if any). =cut sub modifAt { my ($this, $pos, $modif) = @_; croak("No sequence defined") if ($this->getLength() == 0); $pos = int($pos); croak("Invalid position [$pos]") if (($pos < 0) || ($pos > $this->getLength()+1)); if ($modif){ if ($this->getModifType() eq 'PMF'){ $this->{modif} = []; } $this->{modif}[$pos+1] = $modif; $this->{modifType} = 'MS/MS'; undef($this->{mass}); } return $this->{modif}[$pos+1]; } # modifAt =head2 addPmfModif($num, $modifName) Adds a new pair (number of occurence, modification name) to the pmfModif list. This method is for PMF and it forces the modification type to be PMF (clears previous MS/MS modifications if any). =cut sub addPmfModif { my ($this, $num, $modifName) = @_; if ($num && $modifName){ if ($this->getModifType() eq 'MS/MS'){ $this->{modif} = []; } push(@{$this->{pmfModif}}, $num, $modifName); $this->{modifType} = 'PMF'; undef($this->{mass}); } } # addPmfModif =head2 clearModif Clears modifications. =cut sub clearModif { my $this = shift; undef($this->{modif}); undef($this->{mass}); undef($this->{modifType}); } # clearModif =head2 getModifType Returns the modification type ('PMF' or 'MS/MS') or undef if no type is defined, i.e. no modifications are set. =cut sub getModifType { my $this = shift; return $this->{modifType}; } # getModifType =head2 getMass Returns the peptide mass or undefined in case either the peptide sequence is not set or there are variable modifications. =cut sub getMass { my $this = shift; return $this->{mass} if (defined($this->{mass}) && ($this->{massType} == InSilicoSpectro::InSilico::MassCalculator::getMassType())); return undef if (!defined(my $sequence = $this->sequence())); my @list; if ($this->getModifType() eq 'PMF'){ my $modif = $this->modif(); for (my $i = 0; $i < @$modif; $i+=2){ for (my $j = 0; $j < $modif->[$i]; $j++){ push(@list, $modif->[$i+1]); } } } else{ foreach (@{$this->{modif}}){ if (length($_) > 0){ return undef if (index($_, '(*)') != -1); push(@list, $_); } } } my $termGainMass; if (my $enz = $this->enzyme()){ if ($enz->NTermGain() && !$this->nTerm() && (($this->enzymatic() eq 'full') || ($this->enzymatic() eq 'half-N'))){ # Not a N-term peptide and peptide N-terminus created by the enzyme $termGainMass += (InSilicoSpectro::InSilico::MassCalculator::massFromComposition($enz->NTermGain()))[InSilicoSpectro::InSilico::MassCalculator::getMassType()]; } else{ # Standard +H rule $termGainMass += InSilicoSpectro::InSilico::MassCalculator::getMass('el_H'); } if ($enz->CTermGain() && !$this->cTerm() && (($this->enzymatic() eq 'full') || ($this->enzymatic() eq 'half-C'))){ # Not a C-term peptide and peptide C-terminus created by the enzyme $termGainMass += (InSilicoSpectro::InSilico::MassCalculator::massFromComposition($enz->CTermGain()))[InSilicoSpectro::InSilico::MassCalculator::getMassType()]; } else{ # Standard +OH rule $termGainMass += InSilicoSpectro::InSilico::MassCalculator::getMass('el_H')+InSilicoSpectro::InSilico::MassCalculator::getMass('el_O'); } } my $mass = InSilicoSpectro::InSilico::MassCalculator::getPeptideMass(pept=>$sequence, modif=>\@list, termGain=>$termGainMass); $mass += InSilicoSpectro::InSilico::MassCalculator::getMass('el_H+') if ($this->addProton()); ; $this->{mass} = $mass; $this->{massType} = InSilicoSpectro::InSilico::MassCalculator::getMassType(); return $mass; } # getMass =head2 getMoZ(charge) =cut sub getMoZ { my $this = shift; my $charge=shift or croak "must provide a charge for Peptide->getMoZ"; my $m=$this->getMass(); $m-=InSilicoSpectro::InSilico::MassCalculator::getMass('el_H+') if ($this->addProton()); return $m/$charge+InSilicoSpectro::InSilico::MassCalculator::getMass('el_H+'); } =head2 getLength Returns peptide sequence length. =cut sub getLength { my $this = shift; return defined($this->{sequence}) ? length($this->{sequence}) : abs($this->{posEnd}-$this->{posStart})+1; } # getLength =head1 I/O =head2 toString Returns a string made of the amino acids before and after and the peptide sequence. Example 'K.ATURPLJK.S'. =head2 Overloaded "" operator Returns the string returned by toString. =head2 print Prints a complete peptide description. =cut use SelectSaver; use overload '""' => \&toString; sub toString { my $this = shift; return (length($this->aaBefore()) == 1 ? $this->aaBefore().'.' : '').$this->sequence().(length($this->aaAfter()) == 1 ? '.'.$this->aaAfter() : ''); } sub print { my ($this, $out) = @_; my $fdOut = defined($out) ? (new SelectSaver(InSilicoSpectro::Utils::io->getFD($out) || croak("cannot open [$out]: $!"))) : \*STDOUT; print $fdOut $this->toString(), "\n"; print $fdOut "".(InSilicoSpectro::InSilico::MassCalculator::modifToString($this->modif()))."\n" if (defined($this->modif())); print $fdOut $this->nmc(), " missed cleavage(s)\n" if (defined($this->nmc())); print $fdOut "Starts at ", $this->start(), "\n" if (defined($this->start())); print $fdOut "Ends at ", $this->end(), "\n" if (defined($this->end())); print $fdOut "C-terminal peptide\n" if ($this->cTerm()); print $fdOut "N-terminal peptide\n" if ($this->nTerm()); print $fdOut "Enzymatic: ", $this->enzymatic(), "\n" if ($this->enzymatic()); my $massType = InSilicoSpectro::InSilico::MassCalculator::getMassType(); InSilicoSpectro::InSilico::MassCalculator::setMassType(0); print $fdOut "Monoisotopic mass=", $this->getMass(); InSilicoSpectro::InSilico::MassCalculator::setMassType(1); print $fdOut " Da, average mass=", $this->getMass(), " Da\n"; InSilicoSpectro::InSilico::MassCalculator::setMassType($massType); } =head1 EXAMPLES See t/InSilico/testPeptide.pl and t/InSilico/testCalcDigestOOP.pl. =head1 AUTHORS Jacques Colinge, Upper Austria University of Applied Science at Hagenberg Alexandre Masselot, www.genebio.com =cut