package GO::Metadata::Panther; use strict; use warnings; use Exporter; use base qw/Exporter/; use Memoize; use List::Util qw/sum first/; use Data::Dumper; our @EXPORT_OK = qw/@species/; =head1 NAME GO::Metadata::Panther - Species info for data used by Panther Clusters =head1 SYNOPSIS use GO::Metadata::Panther qw/@species/; for my $species (@species) { # do something } Or use GO::Metadata::Panther; my $s = GO::Metadata::Panther->code('YEAST'); =head1 DESCRIPTION Accesses information related to species in the Panther F file. This file can be fetched from: L Each item in the exportable C<@species> array contains a hash reference for each species. The items in that hash are: =over =item code A scalar or the UniProt species code. =item ncbi_taxa_id A scalar reference of NCBI taxa ids that items in the GO database match. This should only be one id, but sometimes it's useful to scan multiple. =back For a complete list of every UniProt species matched to a NCBI taxa L =cut # These need to be in the order you wish to view them on the AmiGO # dist png. # # For reference genomes, the first number is the ncbi_taxa_id list # needs to be the reference in AmiGO::Aid::ReferenceGenome our @species = ( # # A # { # Anopheles gambiae code => 'ANOGA', ncbi_taxa_id => [ 7165 ], prefer => [ 'Gene' ], }, { # Arabidopsis thaliana code => 'ARATH', ncbi_taxa_id => [ 3702 ], }, { # Aquifex aeolicus code => 'AQUAE', ncbi_taxa_id => [ 63363 ], }, { # Ashbya gossypii ATCC 10895 code => 'ASHGO', ncbi_taxa_id => [ 33169 ], }, # # B # { # Bacillus subtilis, code => 'BACSU', ncbi_taxa_id => [ 1423 ], }, { # Bacteroides thetaiotaomicron code => 'BACTN', ncbi_taxa_id => [ 818 ], }, { # Bos taurus code => 'BOVIN', ncbi_taxa_id => [ 9913 ], }, { # Bradyrhizobium japonicum code => 'BRAJA', # matches the two UniProtKB items in GO ncbi_taxa_id => [ 375 ], }, # # C # { # Caenorhabditis briggsae code => 'CAEBR', ncbi_taxa_id => [ 6238 ], }, { # Caenorhabditis elegans code => 'CAEEL', ncbi_taxa_id => [ 6239 ], prefer => [ 'WB' ], }, { # Canis lupus familiaris code => 'CANFA', ncbi_taxa_id => [ 9615 ], prefer => [ 'UniProtKB' ], }, { # Chlamydia trachomatis code => 'CHLTA', ncbi_taxa_id => [ 315277 ], }, { # Chlamydomonas reinhardtii code => 'CHLRE', ncbi_taxa_id => [ 3055 ], # found 11 of them }, { code => 'CHLAA', ncbi_taxa_id => [ 324602 ], }, { # Ciona intestinalis code => 'CIOIN', ncbi_taxa_id => [ 7719 ], }, # # D # { # Danio rerio code => 'DANRE', ncbi_taxa_id => [ 7955 ], prefer => [ 'ZFIN' ], }, { # Deinococcus radiodurans code => 'DEIRA', ncbi_taxa_id => [ 1299 ], }, { # Dictyostelium discoideum code => 'DICDI', ncbi_taxa_id => [ 44689 ], }, { # Drosophila melanogaster code => 'DROME', ncbi_taxa_id => [ 7227 ], prefer => [ 'FB' ], }, # # E # { # Emericella nidulans code => 'EMENI', ncbi_taxa_id => [ 162425 ], }, { # Entamoeba histolytica code => 'ENTHI', ncbi_taxa_id => [ 5759 ] }, { # Escherichia coli code => 'ECOLI', ncbi_taxa_id => [ 83333, 511145 ], prefer => [ 'EcoCyc' ], }, # # G # { # Gallus gallus code => 'CHICK', ncbi_taxa_id => [ 9031 ], prefer => [ 'UniProtKB' ], }, { # Geobacter sulfurreducens code => 'GEOSL', ncbi_taxa_id => [ 35554 ], }, { # Gloeobacter violaceus code => 'GLOVI', ncbi_taxa_id => [ 33072, 251221 ], # 251221 is only here to match GLOVI|ENTREZ:2601616|UniProtKB prefer => [ 'UniProtKB' ], }, # # H # { # Homo sapiens code => 'HUMAN', ncbi_taxa_id => [ 9606 ], prefer => [ 'ENSEMBL', 'UniProtKB' ], }, # # L # { # Leishmania major code => 'LEIMA', ncbi_taxa_id => [ 5664, 347515 ], }, { # Leptospira interrogans code => 'LEPIN', # only gets one ncbi_taxa_id => [ 173 ], }, # # M # { # Macaca mulatta code => 'MACMU', ncbi_taxa_id => [ 9544 ], }, { # Methanosarcina acetivorans code => 'METAC', ncbi_taxa_id => [ 2214 ], }, { # Monodelphis domestica code => 'MONDO', ncbi_taxa_id => [ 13616 ], }, { # Mus musculus code => 'MOUSE', ncbi_taxa_id => [ 10090 ], prefer => [ 'MGI' ], }, # # N # { # Neurospora crassa code => 'NEUCR', ncbi_taxa_id => [ 5141 ], }, # # O # { # Ornithorhynchus anatinus code => 'ORNAN', ncbi_taxa_id => [ 9258 ], }, { # Oryza sativa code => 'ORYSJ', ncbi_taxa_id => [ 39947 ], }, # # P # { # Pan troglodytes code => 'PANTR', ncbi_taxa_id => [ 9598 ], }, { # Plasmodium yoelii code => 'PLAYO', ncbi_taxa_id => [ 73239 ], }, { # Pseudomonas aeruginosa code => 'PSEA7', ncbi_taxa_id => [ 381754 ], }, # # R # { # Rattus norvegicus code => 'RAT', ncbi_taxa_id => [ 10116 ], prefer => [ 'RGD', 'UniProtKB' ], }, # # S # { # Saccharomyces cerevisiae code => 'YEAST', ncbi_taxa_id => [ 4932 ], }, { # Schizosaccharomyces pombe code => 'SCHPO', ncbi_taxa_id => [ 4896 ], }, { # Streptomyces coelicolor code => 'STRCO', ncbi_taxa_id => [ 1902 ], }, { # Strongylocentrotus purpuratus code => 'STRPU', ncbi_taxa_id => [ 7668 ], }, { # Sulfolobus solfataricus code => 'SULSO', ncbi_taxa_id => [ 2287 ], }, # # T # { # Takifugu rubripes code => 'FUGRU', # UniProt calls this: TAKRU ncbi_taxa_id => [ 31033 ], }, { # Tetrahymena thermophila code => 'TETTH', ncbi_taxa_id => [ 5911, 312017 ], }, { # Thermotoga maritima code => 'THEMA', ncbi_taxa_id => [ 2336 ], }, # # X # { # Xenopus', '(Silurana) tropicalis code => 'XENTR', ncbi_taxa_id => [ 8364 ], }, ); =head2 Constructors The constructors scans C<@species> for the requested data and returns the object that matches the data. Otherwise it returns a false false. =over =item my $s = GO::Metadata::Panther->code(I) Return an object filled with the species reference from the UniProtKB species code. =cut memoize('code'); sub code{ my $class = shift; my $code = shift; for my $species (@species) { if ($species->{code} eq $code) { return bless $species, $class; } } return undef; } =item my $s = GO::Metadata::Panther->ncbi(I) Greate an object from the I. =cut sub ncbi{ my $class = shift; my $ncbi = shift; for my $species (@species) { if (first { $ncbi == $_; } @{ $species->{ncbi_taxa_id} }) { return bless $species, $class; } } return undef } =back =head2 Function Functions that can be used outside of the OO interface. =over =item GO::Metadata::Panther::codes() Returns a list of all UniProt species codes in C<@species>. =cut sub codes{ return map { $_->{code} } @species; } =item GO::Metadata::Panther::valid_codes(I) Send it a list of panther Unicode codes, returns true if they are all present in C<@species>. Othewise returns false. =cut sub valid_codes{ return scalar(@_) == sum(map { __PACKAGE__->code($_) ? 1 : 0; } @_); } =back =head2 OO Function =over =item $s->ncbi_ids() Returns the list of NCBI taxa identifiers associated with the UniProt species code. In a perfect word this will only every return one value. In any case, the first value will be the actual numeric identifier associated. =cut sub ncbi_ids{ my $s = shift; return @{ $s->{ncbi_taxa_id} }; } =back =head2 AUTHOR Sven Heinicke Esven@genomics.princeton.edu =cut 1;