The London Perl and Raku Workshop takes place on 26th Oct 2024. If your company depends on Perl, please consider sponsoring and/or attending.
#!/usr/bin/perl 

=pod

=head1 NAME

umls-senserelate-ttest.pl - This program calculates the 
pairwise t-test of two info files. 

=head1 SYNOPSIS

This program calculates the significance between two 
info files created by the umls-senserelate-evaluation.pl 
program with the --info option set. The program uses the 
Statistics::TTest cpan module. 

=head1 USAGE

Usage: umls-senserelate-ttest.pl FILE1 FILE2

=head2 OUTPUT

=head2 Required Options

=head3 FILE(1|2)

This is a file created using the --info option when running the 
umls-senserelate-evaluation.pl program. 

=head2 General Options:

=head3 --version

Displays the version information.

=head3 --help

Displays the help information

=head1 SYSTEM REQUIREMENTS

=over

=item * Perl (version 5.8.5 or better) - http://www.perl.org

=back

=head1 CONTACT US
   
  If you have any trouble installing and using UMLS-Similarity, 
  please contact us via the users mailing list :
    
      umls-similarity@yahoogroups.com
     
  You can join this group by going to:
    
      http://tech.groups.yahoo.com/group/umls-similarity/
     
  You may also contact us directly if you prefer :
    
      Bridget T. McInnes: bthomson at umn.edu 

      Ted Pedersen : tpederse at d.umn.edu

=head1 AUTHOR

 Bridget T. McInnes, University of Minnesota

=head1 COPYRIGHT

Copyright (c) 2013,

 Bridget T. McInnes, University of Minnesota Twin Cities
 bthomson at umn.edu
    
 Ted Pedersen, University of Minnesota Duluth
 tpederse at d.umn.edu
 
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; either version 2 of the License, or (at your option) any later
version.

This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with
this program; if not, write to:

 The Free Software Foundation, Inc.,
 59 Temple Place - Suite 330,
 Boston, MA  02111-1307, USA.

=cut

###############################################################################

#                               THE CODE STARTS HERE
###############################################################################

#                           ================================
#                            COMMAND LINE OPTIONS AND USAGE
#                           ================================


use Statistics::TTest;
use Getopt::Long;

eval(GetOptions( "version", "help")) or die ("Please check the above mentioned option(s).\n");

#  if help is defined, print out help
if( defined $opt_help ) {
    $opt_help = 1;
    &showHelp();
    exit;
} 

#  if version is requested, show version
if( defined $opt_version ) {
    $opt_version = 1;
    &showVersion();
    exit;
}

# At least 1 terms should be given on the command line.
if(scalar(@ARGV) < 2) {
    print STDERR "Two info files must be given on the command line.\n";
    &minimalUsageNotes();
    exit;
}

my $file1 = shift; 
my $file2 = shift;

my @Data1 = (); 
my @Data2 = (); 
my %hash1 = (); 
my %hash2 = (); 

open(FILE1, $file1) || die "Could not open file ($file1)\n";
while(<FILE1>) { 
    chomp;
    my($tw, $acc) = split/\s+/;
    if($acc=~/0\.[0-9]+/) { 
	$hash1{$tw} = $acc;
    }
} close FILE1;

open(FILE2, $file2) || die "Could not open file ($file2)\n";
while(<FILE2>) { 
    chomp;
    my($tw, $acc) = split/\s+/;
    if($acc=~/0\.[0-9]+/) { 
	$hash2{$tw} = $acc; 
    }
} close FILE2;

foreach my $tw (sort keys %hash1) { 
    if(exists $hash2{$tw}) { 
	push @Data1, $hash1{$tw}; 
	push @Data2, $hash2{$tw}; 
    }
}

my $ttest = new Statistics::TTest;
$ttest->set_significance(95);
$ttest->load_data(\@Data1,\@Data2);
$ttest->output_t_test();

my $t    = $ttest->t_statistic;
my $df   = $ttest->df;
my $prob = $ttest->{t_prob};
my $test = $ttest->null_hypothesis();
print "t=$t (df = $df); p-value = $prob\n"; 
print "Null hypothesis is $test\n";


##############################################################################
#  function to output minimal usage notes
##############################################################################
sub minimalUsageNotes {
    
    print "Usage: umls-senserelate-ttest.pl [OPTIONS] FILE1 FILE2\n";
    &askHelp();
    exit;
}

##############################################################################
#  function to output help messages for this program
##############################################################################
sub showHelp() {
        
    print "This is a utility \n";
  
    print "Usage: umls-senserelate-ttest.pl [OPTIONS] FILE1 FILE2\n\n";
    
    print "\n\nGeneral Options:\n\n";

    print "--version                Prints the version number\n\n";
    
    print "--help                   Prints this help message.\n\n";

}

##############################################################################
#  function to output the version number
##############################################################################
sub showVersion {
    print '$Id: umls-senserelate-ttest.pl,v 1.2 2013/05/23 17:53:33 btmcinnes Exp $';
    print "\nCopyright (c) 2013, Ted Pedersen & Bridget McInnes\n";
}

##############################################################################
#  function to output "ask for help" message when user's goofed
##############################################################################
sub askHelp {
    print STDERR "Type umls-senserelate-ttest.pl --help for help.\n";
}