#!/usr/local/bin/perl -w BEGIN { if (defined($ENV{GO_ROOT})) { use lib "$ENV{GO_ROOT}/perl-api"; } } use strict; use GO::AppHandle; if (!@ARGV) { print usage(); exit; } use Getopt::Long; my $apph = GO::AppHandle->connect(\@ARGV); my $h = {}; GetOptions($h, "help=s", "all", "acc|a=s@", "fullheader", "skipnogo", "withname", "filter=s%", "speciesdb=s@", "evcode|e=s@", ); if ($h->{help}) { print usage(); exit; } if ($h->{evcode}) { $apph->filters->{evcodes} = $h->{evcode}; } if ($h->{speciesdb}) { $apph->filters->{speciesdbs} = $h->{speciesdb}; } my $prods; if ($h->{all}) { $prods = $apph->get_all_product_with_seq_ids; printf STDERR "GOT %d prods\n", scalar(@$prods); } else { my $accs = $h->{acc}; if (!$accs) { $accs = [@ARGV]; } $prods = []; foreach my $acc (@$accs) { my $term = $apph->get_term({acc=>$acc}); printf STDERR "%s\n", $term->as_str; my $curr_prods = $term->deep_product_list; push(@$prods, @$curr_prods); } } my $time = localtime(time); for (my $i=0; $i<@$prods; $i++) { my $prod = $prods->[$i]; if (!ref($prod)) { # is an id $prod = $apph->get_product({id=>$prod}); } if ($i && !int($prods->[$i-1])) { # clear memory %{$prods->[$i-1]} = (); } my $seqs = $prod->seq_list; # longest by default my $longest; foreach my $seq (@$seqs) { if (!defined($longest) || $seq->length > $longest->length) { $longest = $seq; } } next unless $longest; my $seq = $longest; my ($sptr) = grep {lc($_->xref_dbname) eq "sptr" } @{$seq->xref_list}; my $hdr = sprintf("%s|%s %s symbol:%s%s %s ", uc($prod->xref->xref_dbname), $prod->xref->xref_key, $sptr ? $sptr->as_str : "-", $prod->symbol, $prod->full_name ? ' "'.$prod->full_name.'"' : "", $prod->species ? sprintf("species:%s \"%s\"", $prod->species->ncbi_taxa_id, $prod->species->binomial) : '-', ); if ($h->{fullheader}) { # TODO: faster way of doing this my $terms = $apph->get_terms({product=>$prod}); next if $h->{skipnogo} && !@$terms; my @h_elts = (); foreach my $term (@$terms) { my $al = $term->selected_association_list; my %codes = (); map { $codes{$_->code} = 1 } map { @{$_->evidence_list} } @$al; my $t_extra = ""; if ($h->{withname}) { $t_extra = sprintf(' "%s"', $term->name); } push(@h_elts, sprintf("[%s%s evidence=%s]", $term->public_acc, $t_extra, join(";", keys %codes), ) ); } $hdr .= join(" ", @h_elts); } $hdr.= " ". join(" ", map {$_->as_str} @{$seq->xref_list}); # $hdr .= sprintf(" --- Exported on $time"); $seq->description($hdr); print $seq->to_fasta; } sub usage { print "get-seqs.pl [-d dbname] [-h dbhost] [-fullheader] [-dbms dbms] [-evcode code] [-all] GO_ID\n"; print < myfasta.fa EOM }