# -*-Perl-*- Test Harness script for Bioperl # $Id$ use strict; BEGIN { use lib '.'; use Bio::Root::Test; test_begin(-tests => 44, -requires_modules => [qw(IO::String LWP::UserAgent HTTP::Request::Common)], -requires_networking => 1); use_ok('Bio::DB::GenBank'); } my %expected_lengths = ( 'MUSIGHBA1' => 408, 'AF303112' => 1611, 'AF303112.1' => 1611, 'AF041456' => 1156, 'CELRABGDI' => 1743, 'CH402638' => 5041 ); my ($gb, $seq, $seqio, $seqin); # # Bio::DB::GenBank # ok $gb = Bio::DB::GenBank->new('-delay'=>0), 'Bio::DB::GenBank'; # get a single seq SKIP: { eval {$seq = $gb->get_Seq_by_id('MUSIGHBA1');}; skip "Couldn't connect to Genbank with Bio::DB::GenBank.pm. Do you have network access? Skipping GenBank tests", 4 if $@; is $seq->length, $expected_lengths{$seq->display_id}, $seq->display_id; eval {$seq = $gb->get_Seq_by_acc('AF303112');}; skip "Couldn't connect to Genbank with Bio::DB::GenBank.pm. Transient network problems? Skipping GenBank tests", 3 if $@; is $seq->length, $expected_lengths{$seq->display_id}, $seq->display_id; eval {$seq = $gb->get_Seq_by_version('AF303112.1');}; skip "Couldn't connect to Genbank with Bio::DB::GenBank.pm. Transient network problems? Skipping GenBank tests", 2 if $@; is $seq->length, $expected_lengths{$seq->display_id}, $seq->display_id; eval {$seq = $gb->get_Seq_by_gi('405830');}; skip "Couldn't connect to Genbank with Bio::DB::GenBank.pm. Transient network problems? Skipping GenBank tests", 1 if $@; is $seq->length, $expected_lengths{$seq->display_id}, $seq->display_id; } $seq = $seqio = undef; # batch mode SKIP: { eval {$seqio = $gb->get_Stream_by_id([qw(J00522 AF303112 2981014)]);}; skip "Batch access test failed for Genbank. Skipping those tests", 4 if $@; my $done = 0; while (my $s = $seqio->next_seq) { is $s->length, $expected_lengths{$s->display_id}, $s->display_id; $done++; } skip('No seqs returned', 4) if !$done; is $done, 3; } $seq = $seqio = undef; # test the temporary file creation and fasta ok $gb = Bio::DB::GenBank->new('-format' => 'fasta', '-retrievaltype' => 'tempfile', '-delay' => 0); SKIP: { eval {$seq = $gb->get_Seq_by_id('MUSIGHBA1');}; skip "Couldn't connect to complete GenBank tests with a tempfile with Bio::DB::GenBank.pm. Skipping those tests", 6 if $@; # last part of id holds the key is $seq->length, $expected_lengths{(split(/\|/,$seq->display_id))[-1]}, $seq->display_id; eval {$seq = $gb->get_Seq_by_acc('AF303112');}; skip "Couldn't connect to complete GenBank tests with a tempfile with Bio::DB::GenBank.pm. Skipping those tests", 5 if $@; # last part of id holds the key is $seq->length, $expected_lengths{(split(/\|/,$seq->display_id))[-1]}, $seq->display_id; # batch mode requires genbank format $gb->request_format("gb"); eval {$seqio = $gb->get_Stream_by_id([qw(J00522 AF303112 2981014)]);}; skip "Couldn't connect to complete GenBank batch tests with a tempfile with Bio::DB::GenBank.pm. Skipping those tests", 4 if $@; my $done = 0; while (my $s = $seqio->next_seq) { is $s->length, $expected_lengths{$s->display_id}; undef $gb; # test the case where the db is gone, # but a temp file should remain until seqio goes away. $done++; } skip('No seqs returned', 4) if !$done; is $done, 3; } $seq = $seqio = undef; # test pipeline creation ok $gb = Bio::DB::GenBank->new('-retrievaltype' => 'pipeline', '-delay' => 0); SKIP: { eval {$seq = $gb->get_Seq_by_id('MUSIGHBA1');}; skip "Couldn't connect to complete GenBank tests with a pipeline with Bio::DB::GenBank.pm. Skipping those tests", 6 if $@; is $seq->length, $expected_lengths{$seq->display_id}, $seq->display_id; eval {$seq = $gb->get_Seq_by_acc('AF303112');}; skip "Couldn't connect to complete GenBank tests with a pipeline with Bio::DB::GenBank.pm. Skipping those tests", 5 if $@; is $seq->length, $expected_lengths{$seq->display_id}, $seq->display_id; eval {$seqio = $gb->get_Stream_by_id([qw(J00522 AF303112 2981014)]);}; skip "Couldn't connect to complete GenBank tests with a pipeline with Bio::DB::GenBank.pm. Skipping those tests", 4 if $@; my $done = 0; while (my $s = $seqio->next_seq) { is $s->length, $expected_lengths{$s->display_id}, $s->display_id; undef $gb; # test the case where the db is gone, # but the pipeline should remain until seqio goes away $done++; } skip('No seqs returned', 4) if !$done; is $done, 3; } $seq = $seqio = undef; # test contig retrieval ok $gb = Bio::DB::GenBank->new('-delay' => 0, '-format' => 'gbwithparts'); SKIP: { eval {$seq = $gb->get_Seq_by_id('CH402638');}; skip "Couldn't connect to GenBank with Bio::DB::GenBank.pm. Skipping those tests", 3 if $@; is $seq->length, $expected_lengths{$seq->display_id}, $seq->display_id; # now to check that postprocess_data in NCBIHelper catches CONTIG... ok $gb = Bio::DB::GenBank->new('-delay' => 0, '-format' => 'gb'); eval {$seq = $gb->get_Seq_by_id('CH402638');}; skip "Couldn't connect to GenBank with Bio::DB::GenBank.pm. Skipping those tests", 1 if $@; is $seq->length, $expected_lengths{$seq->display_id}, $seq->display_id; } $seq = $seqio = undef; # bug 1405 my @result; ok $gb = Bio::DB::GenBank->new(-format => 'Fasta', -seq_start => 2, -seq_stop => 7); SKIP: { eval {$seq = $gb->get_Seq_by_acc("A11111");}; skip "Couldn't connect to complete GenBank tests. Skipping those tests", 15 if $@; is $seq->length, 6; # complexity tests ok $gb = Bio::DB::GenBank->new(-format => 'Fasta', -complexity => 0); eval {$seqin = $gb->get_Stream_by_acc("5");}; skip "Couldn't connect to complete GenBank tests. Skipping those tests", 13 if $@; @result = (1136, 'dna', 342, 'protein'); while ($seq = $seqin->next_seq) { is $seq->length, shift(@result); is $seq->alphabet, shift(@result); } is @result, 0; # Real batch retrieval using epost/efetch # these tests may change if integrated further into Bio::DB::Gen* # Currently only useful for retrieving GI's via get_seq_stream $gb = Bio::DB::GenBank->new(); eval {$seqin = $gb->get_seq_stream(-uids => [4887706 ,431229, 147460], -mode => 'batch');}; skip "Couldn't connect to complete GenBank batchmode epost/efetch tests. Skipping those tests", 8 if $@; my %result = ('M59757' => 12611 ,'X76083'=> 3140, 'J01670'=> 1593); my $ct = 0; while ($seq = $seqin->next_seq) { $ct++; my $acc = $seq->accession; ok exists $result{ $acc }; is $seq->length, $result{ $acc }; delete $result{$acc}; } skip('No seqs returned', 8) if !$ct; is $ct, 3; is %result, 0; }