# $Id: ncbi_taxon_names_parser.pm,v 1.2 2004/11/24 02:28:02 cmungall Exp $ # # # see also - http://www.geneontology.org # - http://www.godatabase.org/dev # # You may distribute this module under the same terms as perl itself package GO::Parsers::ncbi_taxon_names_parser; =head1 NAME GO::Parsers::ncbi_taxon_names_parser - OBO Flat file parser object =head1 SYNOPSIS do not use this class directly; use GO::Parser =cut =head1 DESCRIPTION =cut use Exporter; use Text::Balanced qw(extract_quotelike extract_bracketed); use base qw(GO::Parsers::base_parser); use Carp; use FileHandle; use strict qw(subs vars refs); sub parse_fh { my ($self, $fh) = @_; my @stags = (); my $curr_id; $self->start_event('taxon_set'); while (<$fh>) { chomp; my @vals = split(/\s*\|\s*/,$_); my ($id,$val,$xx,$tag) = @vals; if ($curr_id && $id != $curr_id) { $self->event(taxon=>[ [id=>$curr_id], @stags ]); @stags = (); } $tag = lc($tag); $tag =~ s/\s/_/g; $tag =~ tr/a-z0-0_//cd; push(@stags, [$tag=>$val]); if ($tag eq 'scientific_name') { # lump subspecies in with species; # eg genus=Homo, species=sapiens neanderthalensis my ($genus,$species) = ($val =~ /^(\S+)\s+(.*)/); push(@stags, [genus=>$genus],[species=>$species]); } $curr_id = $id; } $self->event(taxon=>[ [id=>$curr_id], @stags ]); $self->end_event('taxon_set'); return; } 1;