package Stone::GB_Sequence; use strict; use Carp; use vars '@ISA'; =head1 NAME Stone::GB_Sequence - Specialized Access to GenBank Records =head1 SYNOPSIS use Boulder::Genbank; # No need to use Stone::GB_Sequence directly $gb = Boulder::Genbank->newFh qw(M57939 M28274 L36028); while ($entry = <$gb>) { print "Entry's length is ",$entry->length,"\n"; @cds = $entry->match_features(-type=>'CDS'); @exons = $entry->match_features(-type=>'Exon',-start=>100,-end=>300); } } =head1 DESCRIPTION Stone::GB_Sequence provides several specialized access methods to the various fields in a GenBank flat file record. You can return the sequence as a Bio::Seq object, or query the sequence for features that match positional or descriptional criteria that you provide. =head1 CONSTRUCTORS This class is not intended to be created directly, but via a L stream. =head1 METHODS In addition to the standard L methods and accessors, the following methods are provided. In the synopses, the variable C<$entry> refers to a previously-created Stone::GB_Sequence object. =head2 $length = $entry->length Get the length of the sequence. =head2 $start = $entry->start Get the start position of the sequence, currently always "1". =head2 $end = $entry->end Get the end position of the sequence, currently always the same as the length. =head2 @feature_list = $entry->features(-pos=>[50,450],-type=>['CDS','Exon']) features() will search the entry feature list for those features that meet certain criteria. The criteria are specified using the B<-pos> and/or B<-type> argument names, as shown below. =over 4 =item -pos Provide a position or range of positions which the feature must B. A single position is specified in this way: -pos => 1500; # feature must overlap postion 1500 or a range of positions in this way: -pos => [1000,1500]; # 1000 to 1500 inclusive If no criteria are provided, then features() returns all the features, and is equivalent to calling the Features() accessor. =item -type, -types Filter the list of features by type or a set of types. Matches are case-insensitive, so "exon", "Exon" and "EXON" are all equivalent. You may call with a single type as in: -type => 'Exon' or with a list of types, as in -types => ['Exon','CDS'] The names "-type" and "-types" can be used interchangeably. =head2 $seqObj = $entry->bioSeq; Returns a L object from the Bioperl project. Dies with an error message unless the Bio::Seq module is installed. =back =head1 AUTHOR Lincoln D. Stein . =head1 COPYRIGHT Copyright 1997-1999, Cold Spring Harbor Laboratory, Cold Spring Harbor NY. This module can be used and distributed on the same terms as Perl itself. =head1 SEE ALSO L, L, L =cut @ISA = 'Stone'; # -------------------- h'mmmmm, bogus alert! -------------------- # Return list of all features that overlap a particular range # # Example: # @features = $gb->grab_features(-pos=>[50,450],-types=>['CDS','Exon']) # sub features { my $self = shift; my %param = @_; my %p; @p{map {s/^-//; s/s$//; lc $_} keys %param} = values %param; my $f = $self->Features; # regularize coordinates my $pos = $p{po}; my ($left,$right) = ref($pos) ? @$pos : $pos; $left ||= 0; $right ||= $left; ($left,$right) = ($right,$left) if $left > $right; # regularize types my @types = ref $p{type} ? @{$p{type}} : $p{type} if $p{type}; @types = $f->tags unless @types; # flatten into a list of all features of the specified type(s) my @features; for my $t (@types) { my @f = $f->get("\u\L$t"); foreach (@f) { $_->insert(Type=>$t) unless $_->get('Type'); } push @features,@f; } @features = grep { _overlap_filter($_,$left,$right) } @features if $left > 0 || $right > 0; return @features; } sub length { my $self = shift; return length $self->Sequence; } sub start { 1; } sub end { $_[0]->length } sub bioSeq { my $self = shift; my $id = $self->Accession; my $seq = $self->Sequence; my $desc = $self->Definition; eval { require Bio::Seq } || croak "Bio::Seq module not installed"; return Bio::Seq->new(-id=>$id,-sequence=>$seq,-desc=>$desc); } sub _feature_filter { my $f = shift; my $types = shift; foreach (@$types) { return 1 if $f->get("\u\L$_"); } return; } sub _overlap_filter { # assumes left and right are numerically sorted my $f = shift; my ($left,$right) = @_; return unless my $p = $f->Position; # simplest case -- single position if ($p =~ /^(\d+)$/) { return 1 if $1 >= $left and $1 <= $right; } # another simple case -- either/or if ($p =~ /^(\d+)[.^](\d+)$/) { return 1 if $1 == $left || $2 == $left || $1 == $right || $2 == $right; } # next simplest case -- a range if ($p =~ /^?(\d+)$/) { ($2,$1) = ($1,$2) if $1 > $2; return 1 if ($left >= $1 and $left <= $2) or ($right >= $1 and $left <= $2); } # complex case, a join(), order() or group() # not sure this is handled correctly in all the crazy combos if ($p =~ /^(?:join|order|group|complement)/) { $p =~ s/\((\d)+\.\d+\)\.\./$1../g; # (1.10).. => 1.. $p =~ s/\.\.\(\d+\.(\d+)\)/..$1/g; # ..(1.10) => ..10 my @ranges = $p =~ /[(),](?\d+)/g; foreach (@ranges) { next unless /?(\d+)/; ($2,$1) = ($1,$2) if $1 > $2; return 1 if ($left >= $1 and $left <= $2) or ($right >= $1 and $left <= $2); } } return; } 1; __END__