#!/usr/bin/perl # $Id: batchGOView.pl,v 1.7 2009/10/29 18:31:01 sherlock Exp $ # Date : 4th December 2003 # Author : Gavin Sherlock # License information (the MIT license) # Copyright (c) 2003 Gavin Sherlock; Stanford University # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation files # (the "Software"), to deal in the Software without restriction, # including without limitation the rights to use, copy, modify, merge, # publish, distribute, sublicense, and/or sell copies of the Software, # and to permit persons to whom the Software is furnished to do so, # subject to the following conditions: # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. use strict; use diagnostics; use warnings; use CGI qw/:all :html3/; use IO::File; =head1 NAME batchGOView.pl - batch processor for creating visual output from GO::TermFinder =head1 SYNOPSIS batchGoView.pl will read through a number of files, each containing a list of genes, and will create for each one an html page with a GO::View, such that you can graphically browse the results. You need to provide a .conf file, and then a list of files for processing, each of which contain a list of genes. An example .conf file exists in this directory - edit as appropriate. Usage: batchGOView.pl <.conf_file> ... The following usage should give you some output, using the example files: batchGOView.pl GoView.conf genes.txt genes2.txt An html file, batchGOView.html will be created, that will allow you to browse the results from all of the input lists of genes in a simple format. A frame on the left will have a list of the files that were input, and the frame on the right will display the results for the clicked on link. =cut use GO::TermFinder; use GO::AnnotationProvider::AnnotationParser; use GO::OntologyProvider::OboParser; use GO::View; use GO::TermFinderReport::Html; use GO::Utils::File qw (GenesFromFile); use GO::Utils::General qw (CategorizeGenes); $|=1; ################################################################################## sub Usage{ ################################################################################### print < ... e.g. batchGOView.pl GoView.conf genes.txt genes2.txt USAGE exit; } # we need at least 2 arguments, a .conf file, and a file of input # genes to test &Usage if (@ARGV < 2); my $confFile = shift; my $conf = &ReadConfFile($confFile); # now set up the objects we need my $ontology = GO::OntologyProvider::OboParser->new(ontologyFile => $conf->{'ontologyFile'}, aspect => $conf->{'aspect'}); my $annotation = GO::AnnotationProvider::AnnotationParser->new(annotationFile=>$conf->{'annotationFile'}); my @additionalArgs; my @population; if (exists $conf->{'population'} && defined $conf->{'population'}){ # they have defined an input file containing genes from which the # listed genes were sampled @population = GenesFromFile($conf->{'population'}); push(@additionalArgs, ('population', \@population)); $conf->{'totalNumGenes'} = scalar(@population); }else{ $conf->{'totalNumGenes'} ||= $annotation->numAnnotatedGenes; push(@additionalArgs, ('totalNumGenes', $conf->{'totalNumGenes'})); } my $termFinder = GO::TermFinder->new(annotationProvider=> $annotation, ontologyProvider => $ontology, aspect => $conf->{'aspect'}, @additionalArgs); my $report = GO::TermFinderReport::Html->new(); &GenerateFrameset; # now open an html file that will have a list of links for all the results my $htmlFile = $conf->{'outDir'}.'batchGOViewList.html'; my $listFh = IO::File->new($htmlFile, q{>} )|| die "Cannot make $htmlFile : $!"; # go through each file foreach my $file (@ARGV){ print "Analyzing $file\n"; # get the genes in the file my @genes = GenesFromFile($file); # now find terms my @pvalues = $termFinder->findTerms(genes => \@genes, calculateFDR => $conf->{'calculateFDR'}); # now we hand these off to the GO::View module, to create the image etc. my $goView = GO::View->new(-ontologyProvider => $ontology, -annotationProvider => $annotation, -termFinder => \@pvalues, -aspect => $conf->{'aspect'}, -configFile => $confFile, -imageDir => $conf->{'outDir'}, -imageLabel => "Batch GO::View", -nodeUrl => $conf->{'goidUrl'}, -geneUrl => $conf->{'geneUrl'}, -pvalueCutOff => $conf->{'pvalueCutOff'}); # We now want to get the image and map that has hopefully been # created by the GO::View module, so we can print it to our html # page my $imageFile; if ($goView->graph) { $imageFile = $goView->showGraph; } my $htmlFile = &GenerateHTMLFile($file, $goView->imageMap, \@pvalues, scalar($termFinder->genesDatabaseIds), "Terms for $file"); print $listFh a({-href => $htmlFile, -target => 'result'}, $htmlFile), br; } $listFh->close; sub GenerateHTMLFile{ my ($file, $map, $pvaluesRef, $numGenes, $title) = @_; # work out name of html file my $htmlFile = $file; # delete anything up to and including the last slash $htmlFile =~ s/.*\///; # delete anything following the last period $htmlFile =~ s/\..*//; # now add an html suffix $htmlFile .= ".html"; my $fullHtmlFile = $conf->{'outDir'}.$htmlFile; my $htmlFh = IO::File->new($fullHtmlFile, q{>} )|| die "Cannot make $fullHtmlFile : $!"; print $htmlFh start_html(-title=>$title); print $htmlFh center(h2($title)), hr; print $htmlFh $map if defined $map; my $numRows = $report->print(pvalues => $pvaluesRef, aspect => $conf->{'aspect'}, numGenes => $numGenes, totalNum => $conf->{'totalNumGenes'}, fh => $htmlFh, pvalueCutOff => $conf->{'pvalueCutOff'}, geneUrl => $conf->{'geneUrl'}, goidUrl => $conf->{'goidUrl'}); if ($numRows == 0){ print $htmlFh h4(font({-color=>'red'}), center("There were no GO nodes exceeding the p-value cutoff of $conf->{'pvalueCutOff'} for the genes in $file.")); } print $htmlFh end_html; $htmlFh->close; return ($htmlFile); } sub ReadConfFile{ my $confFile = shift; my %conf; my $confFh = IO::File->new($confFile, q{<} )|| die "cannot open $confFile : $!"; while (<$confFh>){ next if /^\#/; # skip comment lines chomp; next if /^\s*$/; # skip blank lines, or those without content next unless /(.+) = (.+)/; my ($param, $value) = ($1, $2); $value =~ s/\s$//; $conf{$param} = $value; } $confFh->close; if (!exists $conf{'annotationFile'} || !defined $conf{'annotationFile'}){ die "Your conf file must specify an annotation file entry."; }elsif (!exists $conf{'ontologyFile'} || !defined $conf{'ontologyFile'}){ die "Your conf file must specify an ontology file entry."; }elsif (!exists $conf{'aspect'} || !defined $conf{'aspect'}){ die "Your conf file must specify an aspect entry."; } if (!exists $conf{'totalNumGenes'} || !defined $conf{'totalNumGenes'}){ $conf{'totalNumGenes'} = ""; # simply make it the empty string for now } if (!exists $conf{'outDir'} || !defined $conf{'outDir'}){ $conf{'outDir'} = ""; # set to empty string for now } $conf{'geneUrl'} ||= ""; $conf{'goidUrl'} ||= ""; $conf{'pvalueCutOff'} ||= 1; $conf{'calculateFDR'} ||= 0; # now make sure that file paths are treated relative to the conf file my $confDir = "./"; # default if ($confFile =~ /(.+)\//){ $confDir = $1."/"; # adjust if necessary } foreach my $file ($conf{'annotationFile'}, $conf{'ontologyFile'}, $conf{'outDir'}){ # $file is an alias for the hash entry if ($file !~ /^\//){ # if it's not an absolute path $file = $confDir.$file; # add the confDir on the front } } # return a reference to the hash return \%conf; } sub GenerateFrameset{ # start an index file that a user can use to browse the output data, # using frames my $framesFile = $conf->{'outDir'}."batchGOView.html"; my $framesFh = IO::File->new($framesFile, q{>} )|| die "Cannot create $framesFile : $!"; print $framesFh frameset({-cols => "100, *", -marginheight => '0', -marginwidth => '0', -frameborder => '1', -border => '1'}, frame({'-name' => "list", -src => "batchGOViewList.html", -marginwidth => 0, -marginheight => 0, -border => 1}), frame({'-name' =>'result', -marginwidth => 0, -marginheight => 0, -border => 1})); $framesFh->close; return; } =pod =head1 AUTHOR Gavin Sherlock sherlock@genome.stanford.edu =cut