# $Id: locuslink_parser.pm,v 1.1 2004/01/27 23:52:24 cmungall Exp $ # # Adapterd from BioPerl module for Bio::SeqIO::locuslink # # POD documentation - main docs before the code =head1 NAME GO::Parsers::locuslink_parser - =head1 SYNOPSIS =head1 DESCRIPTION =cut package GO::Parsers::locuslink_parser; use strict; use vars qw(@ISA); use base qw(GO::Parsers::base_parser); sub _initialize { my($self,@args) = @_; $self->SUPER::_initialize(@args); } sub transitions { return qw( NM transcript NG 0 CONTIG 0 EVID evidence ACCNUM accession OFFICIAL_SYMBOL 0 BUTTON url DB_DESCR dbxref /DB_LINK dbxref ); } sub compounds { return ( STS => [qw(sts_acc chr_num unk symbol type src)], GO => [qw(aspect term evcode go_acc src unk)], EXTANNOT => [qw(aspect term evcode src unk)], CDD => [qw(domain domain_acc num unk score)], NG => [qw(acc u1 u2 u3 u4)], CONTIG => [qw(contig_acc u1 u2 u3 u4 strand chr_num src)], XM => [qw(acc gi)], XP => [qw(acc gi)], XG => [qw(acc gi)], ACCNUM => [qw(acc gi)], PROT => [qw(acc gi)], MAP => [qw(map_loc link code)], SUMFUNC => [qw(descr src)], GRIF => [qw(grif_pmid descr)], COMP => [qw(comp_acc symbol2 chr_num2 map_pos2 locusacc2 chr_num1 symbol1 src)], ); } sub record_tag {'locusset'} sub parse_fh { my $self = shift; my $fh = shift; $self->start_event('locusset'); my (%record,@results,$search,$ref,$cddref); my ($PRESENT,@keep); # LOCUSLINK entries begin w/ >> local $/=">>"; # slurp in a whole entry and return if no more entries return unless my $entry = <$fh>; # if its the first entry you have to slurp it in again if ($entry eq '>>'){ #first entry return unless $entry = <$fh>; } if (!($entry=~/LOCUSID/)){ $self->throw("No LOCUSID in first line of record. ". "Not LocusLink in my book."); } my %transitions = $self->transitions; my %compounds = $self->compounds; # my %grouped = (); # foreach (keys %transitions) { # if (/\;/) { # my $t = $transitions{$_}; # my (@keylist) = split(/\;/, $_); # foreach (@keylist) { # $transitions{$_} = $t; # $grouped{$_} = $t; # } # } # } $self->start_event('locus'); my $level = 0; my @lines = split(/\n/, $entry); foreach (@lines) { if (/(\w+):\s*(.*)/) { my ($k, $v) = (uc($1), $2); my $transition = $transitions{$k}; if (defined $transition) { if (!$transition) { if ($level) { #$self->throw("uh oh $_") unless $level; $self->end_event($level); } $level = 0; } elsif ($transition eq $level) { $self->end_event($level); $self->start_event($level); } else { if ($level) { $self->end_event($level); $level = 0; } $self->start_event($transition); $level = $transition; } } # for grouped keys, every key must be part of # group to remain part of the same super-element # if ($level && # $grouped{$level}) { # if (!$grouped{$k} || # $grouped{$k} ne $grouped{$level}) { # $self->end_event($level); # $level = 0; # } # } if ($compounds{$k}) { my (@vals) = split(/\|/, $v); my @pairs = ([defline=>$v]); foreach (@{$compounds{$k}}) { my $v = shift @vals; push(@pairs, [$_ => $v]) unless $v eq 'na'; } $self->event(lc($k) => [@pairs]); } else { $self->event(lc($k), $v); } my $end = $transitions{'/'.$k}; if ($end) { $self->end_event($end); $level = 0; } } } if ($level) { $self->end_event($level); } $self->end_event('locus'); $self->end_event('locusset'); return; } 1;