use strict; package InSilicoSpectro::Databanks::DBEntry; require Exporter; use Carp; =head1 NAME InSilico::Databanks::DBEntry - fasta databank entry + header's annotated info =head1 SYNOPSIS =head1 DESCRIPTION describes an databnk entry (header + sequence) =head1 FUNCTIONS =head1 METHODS =head3 my $dbe=InSilico::Databanks::DBEntry->new([\%h]); =head2 Accessors/Setters =head3 $dbe->AC([val]) get/set accession code =head3 $dbe->ID([val]) get/set accession ID =head3 $dbe->dbName([val]) [GS]et databank name =head3 $dbe->taxoID([val]) [GS]et taxonomy if (typically NCBI taxid =head3 $dbe->description([val]) =head3 $dbe->sequence([val]) Get/Set AC, ID, ... =head3 $dbe->seqType(["AA"|"DNA"]) Get or set the sequence type =head3 $dbe->annotatedModRes(string|[[pos1, mod1], [pos2, mod2]]); get or set annotated PTM from a string (e.g. (1|ACET_nterm)(2|ACET_nterm)(185|PHOS)) or an array. All previously set TPM are removed =head3 $dbe->add_annotatedModRes(pos, modkey) add an annotated PTM =head3 $dbe->clear_annotatedModRes() remove all annotated modres =head3 $dbe->variants(string|[[pos1, seq1a, seq1b], [pos2, seq2a, seq2b]]); get or set variant a string (e.g. ("(9|F|Y)(30|D|N)(41|A|G)(43|Q|R)") or an array. All previously set TPM are removed =head3 $dbe->add_variant(pos, seqa, seqb) add a VARIANT, replacing seqa by seqb (often just one aminoacid) at pos =head3 $dbe->clear_variants() remove all Variants =head2 I/O =head3 $dbe->readFasta($fastacontent); read info from fasta contents (fitrs line with '>' and info + remaining is sequence. =head3 $dbe->printFasta(); print the entry under fasta format (use SelectSaver or whatever select method to redirect towards a file descriptor); =head3 $dbe->printHtml(); Print the entry under a html format. =head1 EXAMPLES =head1 SEE ALSO =head1 COPYRIGHT Copyright (C) 2004-2005 Geneva Bioinformatics www.genebio.com This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA =head1 AUTHORS Alexandre Masselot, www.genebio.com =cut use File::Basename; { use Object::InsideOut; my @AC :Field(Accessor => 'AC' ); my @ID :Field(Accessor => 'ID' ); my @ACorig :Field(Accessor => 'ACorig' ); my @dbName :Field(Accessor => 'dbName' ); my @taxoID :Field(Accessor => 'taxoID' ); my @__sequence :Field(Accessor => '__sequence', permission=>'Private'); my @description :Field(Accessor => 'description' ); my @ncbiTaxid :Field(Accessor => 'ncbiTaxid' ); my @annotatedModRes :Field(Accessor => '__annotatedModRes', Type=>'List', permission=>'Private'); my @variants :Field(Accessor => '__variants', Type=>'List', permission=>'Private'); my @PI :Field(Accessor => 'PI' ); my @mass :Field(Accessor => 'mass' ); my %init_args :InitArgs = ( AC=>'AC', COPY=>qr/^copy$/i, ); sub _init :Init{ my ($self, $h) = @_; $self->__annotatedModRes([]); $self->__variants([]); if($h->{COPY}){ my $src=$h->{COPY}; foreach(qw(AC ID ACorig __sequence description ncbiTaxid __annotatedModRes __variants)){ $self->$_($src->$_()); } }else{ foreach (keys %$h) { $self->$_($h->{$_}); } } return $self; }; sub sequence{ my ($self, $val)=@_; if (defined $val) { $val=~s/\s+//g; $self->__sequence($val); return $val; } return $self->__sequence; } sub seqType{ my ($self, $val)=@_; if (defined $val) { croak "DBEntry seqtype must be of (AA|DNA)" unless $val=~/^(AA|DNA)$/; $self->{seqType}=$val; return $val; } return $self->{seqType}; } sub annotatedModRes{ my $self=shift; my $set=exists $_[0]; if ($set) { if ((ref $_[0]) eq 'ARRAY') { my @tmp=@{$_[0]}; $self->__annotatedModRes(\@tmp); } else { my @tmp; my $s=$_[0]; while ($s=/\((\d+)\|([^\)]+)\)/g) { push @tmp, [$1, $2]; } $self->__annotatedModRes(\@tmp); } return; } return undef unless defined $self->__annotatedModRes; my @tmp=@{$self->__annotatedModRes}; if (wantarray) { return @tmp; } else { my $ret; foreach (@tmp) { $ret.="($_->[0]|$_->[1])"; } return $ret; } } sub add_annotatedModRes{ my ($self, $p, $m)=@_; push @{$self->__annotatedModRes}, [$p, $m]; } sub clear_annotatedModRes{ my $self=shift; $self->__annotatedModRes([]); } sub variants{ my $self=shift; my $set=exists $_[0]; if ($set) { if ((ref $_[0]) eq 'ARRAY') { my @tmp=@{$_[0]}; $self->__variants(\@tmp); } else { my @tmp; my $s=$_[0]; while ($s=/\((\d+)\|(\w+)\|(\w+)\)/g) { push @tmp, [$1, $2, $3]; } $self->__variants(\@tmp); } return; } return undef unless defined $self->__variants; my @tmp=@{$self->__variants}; if (wantarray) { return @tmp; } else { my $ret; foreach (@tmp) { $ret.="($_->[0]|$_->[1]|$_->[2])"; } return $ret; } } sub add_variant{ my ($self, $p, $s1, $s2)=@_; push @{$self->__variants}, [$p, $s1, $s2]; } sub clear_variants{ my $self=shift; $self->__variants([]); } # I/O sub readFasta{ my $self=shift; my ($header, $seq)=split /\n/, shift, 2; $header=~s/^>//; $header=~s/^(\S+)\s*// or croak "fasta header [$header] does not start with an no empty string for AC"; my $ac=$1; $self->AC($ac); foreach (split /\\(?=\w+=)/, $header) { my ($key, $val)=split /=/, $_, 2; $val=~s/\s+$// if $val; next unless $key; if ($key eq 'AC') { $self->AC($val); } elsif ($key eq 'ACOR') { $self->ACorig($val); } elsif ($key eq 'ID') { $self->ID($val); } elsif ($key eq 'DE') { $self->description($val); } elsif ($key eq 'NCBITAXID') { $self->ncbiTaxid($val); } elsif ($key eq 'MODRES') { $self->annotatedModRes($val); } elsif ($key eq 'VARIANT') { $self->variants($val); } elsif ($key eq 'LENGTH') { } else { carp "DBEntry::readFasta no function handler for fasta head [$key]"; } } $self->description($header) unless $self->description; $seq=~s/\s+//g; $self->sequence($seq); } sub printFasta{ my $self=shift; print ">".$self->AC()." \\ID=".($self->ID|| '')." \\MODRES=".($self->annotatedModRes() ||''); print " \\VARIANT=".$self->variants if $self->variants; print " \\ACOR=".$self->ACorig if $self->ACorig; print " \\NCBITAXID=".$self->ncbiTaxid if $self->ncbiTaxid; print " \\DE=".$self->description."\n"; my $seq=$self->sequence(); $seq=~s/(.{60})(?=.)/$1\n/g; #$seq=~s/(.{10})(?=.)/$1 /g; print $seq."\n"; } sub printHtml{ my $self=shift; print "
| AC | ".$self->AC." |
| ID | ".$self->ID." |
| original AC | ".$self->ACorig." |
| description | ".$self->description." |