# $Id: refgenomes_parser.pm,v 1.1 2007/01/24 01:16:20 cmungall Exp $ # # # see also - http://www.geneontology.org # - http://www.godatabase.org/dev # # You may distribute this module under the same terms as perl itself package GO::Parsers::refgenomes_parser; =head1 NAME GO::Parsers::refgenomes_parser - syntax parsing of GO .def flat files =head1 SYNOPSIS do not use this class directly; use GO::Parser =cut =head1 DESCRIPTION =head1 GO DEFINITION FILES =head1 AUTHOR =cut use Exporter; use base qw(GO::Parsers::base_parser); use GO::Parsers::ParserEventNames; # declare XML constants use Carp; use FileHandle; use strict qw(subs vars refs); sub dtd { 'refgenomes-parser-events.dtd'; } sub _class { 'generic' } sub _id_column {} sub _map_property_type { shift;@_ } our %DB_LOOKUP = (dictybase=>'DDB', flybase=>'FB', wormbase=>'WB', goa=>'UniProt', chicken=>'UniProt', zfin=>'ZFIN', pombase=>'GeneDB_Spombe', ); sub parse_fh { my ($self, $fh) = @_; my $file = $self->file; my $LAST_COL = 'completion target'; my @hdr = (); $self->start_event('refgenomeset'); my $lnum = 0; my $in_record=0; my $class = $self->_class; my $id_column = $self->_id_column; while (my $line = <$fh>) { chomp $line; ++$lnum; next if $line =~ /^\!/; $line =~ s/^\s+$//; if (!$line) { $self->pop_stack_to_depth(1); $in_record = 0; next; } my @vals = split(/\t/,$line); if (!@hdr) { @hdr = @vals; next; } $self->start_event('homologset'); my %valh = (); for (my $i=0; $i<@hdr; $i++) { my $col = $hdr[$i]; $col =~ s/\s/_/g; $col =~ s/\W//g; $valh{$col} = $vals[$i]; } my $id = $valh{OMIM_ID}; $id =~ s/\W//g; $self->event('@'=>[[id=>"MIM:$id"]]); my $in_genes = 0; my $i=-1; while ($i<@vals) { $i++; my $col = $hdr[$i]; my $val = $vals[$i]; if ($in_genes) { my ($sp, @extra) = split(' ',$col); if (!@extra) { # ignore anything with annoying chatty text next if $val =~ / /i; next unless $val; next if $val =~ /\"/; my $sp2 = $DB_LOOKUP{lc($sp)}; $sp = $sp2 if ($sp2); my $fid = "$sp:$val"; $self->event('member',[['@'=>[[ref=>$fid]]]]); } } else { if ($col eq $LAST_COL) { $in_genes = 1; } $col =~ s/\s/_/g; $col =~ s/\W//g; $self->event(tagval=>[['@'=>[[type=>$col]]],['.'=>$val]]) if $val; } } $self->end_event('homologset'); } $self->pop_stack_to_depth(0); # end event obo } 1;