#!/usr/bin/perl
use strict;
use warnings;
use diagnostics;
use Test;
BEGIN { plan tests => 15 };
# File : GO-TermFinder-Native.t
# Author : Ihab A.B. Awad
# Date Begun : October 13th 2004
# $Id: GO-TermFinder-Native.t,v 1.2 2007/03/18 01:37:14 sherlock Exp $
# This file tests the native math functions in module GO::TermFinder::Native.
use GO::TermFinder::Native;
$| = 1;
my $d = GO::TermFinder::Native::Distributions->new(8192);
# check that the logfactorial is okay
# first calculate factorials from 0 through 10
my @factorials = (1, 1); # initialize for 0 and 1
for (my $i = 2; $i <= 10; $i++){
$factorials[$i] = $factorials[$i-1] * $i;
}
# now check them against the log values from the Distributions object
for (my $i = 0; $i <= 10; $i++){
ok(log($factorials[$i]), $d->logFactorial($i));
}
# now check that the __logNCr method is working correctly
# test that we get the correct value as if 6 had been chosen out of 10,
# given that:
#
# n!
# nCr = ---------
# r! (n-r)!
{ # lexically scope, to prevent collision between this $n and the $n
# used below
my $n = 10;
my $r = 6;
my $nChooseR = $factorials[$n] / ($factorials[$r] * $factorials[$n-$r]);
# now check against the log value that the TermFinder will return
ok($d->logNCr($n, $r), log($nChooseR));
}
# now let's test that the hypergeometric function works correctly
#
# we'll do a simple test for the probability of picking 3 out of 5,
# given that in the population there is 4 out of 10
#
# The calculation is the probability of picking x positives from a
# sample of n, given that there are M positives in a population of N.
#
# The value is calculated as:
#
# (M choose x) (N-M choose n-x)
# P = -----------------------------
# N choose n
#
my $M = 4;
my $N = 10;
my $n = 5;
my $x = 3;
my $a = $factorials[$M] / ($factorials[$x] * $factorials[$M-$x]);
my $b = $factorials[$N - $M] / ($factorials[$n - $x] * $factorials[($N - $M) - ($n - $x)]);
my $c = $factorials[$N] / ($factorials[$n] * $factorials[$N-$n]);
my $probability = ($a * $b) / $c;
ok($probability, $d->hypergeometric($x, $n, $M, $N));
# now we want to check the pvalue using the hypergeometric
#
# the pvalue is the probability of getting x or more from a sample of
# n, given M positives in a population of N
#
# We'll use the same example as above, and calculate the pvalue for 3
# of 5, given 4 of 10 in the population
my $pvalue = 0;
for (my $i = $x; $i <= $n; $i++){
my $a = $factorials[$M] / ($factorials[$i] * $factorials[$M-$i]);
my $b = $factorials[$N - $M] / ($factorials[$n - $i] * $factorials[($N - $M) - ($n - $i)]);
my $c = $factorials[$N] / ($factorials[$n] * $factorials[$N-$n]);
my $probability = ($a * $b) / $c;
$pvalue += $probability;
}
# because the GO::TermFinder::Native::Distributions module uses log
# space internally to calculate factorials and nChooseR, it is not as
# precise as this test-suite. Thus we need to reduce the precision a
# little.
#
# Should get GO::TermFinder to use BigInt sometime....
$pvalue = sprintf("%.8f", $pvalue);
my $test = sprintf("%.8f", $d->pValueByHypergeometric($x, $n, $M, $N));
ok($pvalue, $test);
$pvalue = 0;
for (my $i = 0; $i < $x; $i++){
my $a = $factorials[$M] / ($factorials[$i] * $factorials[$M-$i]);
my $b = $factorials[$N - $M] / ($factorials[$n - $i] * $factorials[($N - $M) - ($n - $i)]);
my $c = $factorials[$N] / ($factorials[$n] * $factorials[$N-$n]);
my $probability = ($a * $b) / $c;
$pvalue += $probability;
}
$pvalue = 1 - $pvalue;
$pvalue = sprintf("%.8f", $pvalue);
ok($pvalue, $test);