#! /usr/bin/perl -w # # rankFormat.pl version 2.05 # (Last updated $Id: rankFormat.pl,v 1.6 2008/05/30 23:12:43 sidz1979 Exp $) # # ----------------------------------------------------------------------------- use strict; use Getopt::Long; # Declarations! our ($opt_version, $opt_help); my (@parts, %hash); # Get the options! &GetOptions("version", "help"); # If the version information has been requested... if(defined $opt_version) { &printVersion(); exit; } # If detailed help has been requested... if(defined $opt_help) { &printHelp(); exit; } while(<>) { s/[\r\f\n]//; s/^\s+//; s/\s+$//; @parts = split(/\s+/); $hash{"$parts[0]<>$parts[1]<>"} = sprintf("%.2f", $parts[2]) if(@parts); } print "0\n"; my $lastRank = 1; my $rank = 1; my $last; foreach my $key (sort {$hash{$b} <=> $hash{$a}} keys %hash) { $lastRank = $rank if(defined $last && $hash{$key} != $last); print "$key$lastRank $hash{$key} 0 0 0\n"; $rank++; $last = $hash{$key}; } # ----------------- Subroutines Start Here ---------------------- # Subroutine to print detailed help sub printHelp { &printUsage(); print "\nProgram to rank the output of similarity.pl based on the semantic\n"; print "relatedness of the word pairs. The output is in a format that can\n"; print "processed by the rank.pl program of the Text::NSP package. The rank.pl\n"; print "program takes two lists of ranked pairs of words and computes the\n"; print "correlation between them according to Spearman's correlation\n"; print "coefficient. The input can be specified in file(s) on the command line,\n"; print "or if no file(s) are specified, then STDIN is assumed. Output is written\n"; print "to STDOUT.\n\n"; print "Options: \n"; print "--help Displays this help screen.\n"; print "--version Displays version information.\n\n"; } # Subroutine to print minimal usage notes sub minimalUsageNotes { &printUsage(); print "Type rankFormat.pl --help for detailed help.\n"; } # Subroutine that prints the usage sub printUsage { print "Usage: rankFormat.pl [FILE... |--help | --version]\n"; } # Subroutine to print the version information sub printVersion { print "rankFormat.pl version 2.05\n"; print "Copyright (c) 2006-2008, Ted Pedersen and Siddharth Patwardhan.\n"; } __END__ =head1 NAME rankFormat.pl - Measure the correlation between two different rankings of word pairs measured for semantic relatedness. =head1 SYNOPSIS rankFormat.pl [FILE... | --help | --version] =head1 DESCRIPTION Program to rank the output of similarity.pl based on the semantic relatedness of the word pairs. The output is in a format that can processed by the rank.pl program of the L package. The rank.pl program takes two lists of ranked pairs of words and computes the correlation between them according to Spearman's correlation coefficient. This can be used for comparing the output of two different measures on the same set of word pairs, or for comparing the output of similarity.pl with a human created gold standard (such as the Miller and Charles 30 word pair data). =head1 OPTIONS B<--help> Displays a detailed usage message B<--version> Displays version information. =head1 BUGS Report to WordNet::Similarity mailing list : L =head1 SEE ALSO WordNet home page : L WordNet::Similarity home page : L More info about Text::NSP L =head1 AUTHORS Ted Pedersen, University of Minnesota Duluth tpederse at d.umn.edu Siddharth Patwardhan, University of Utah, Salt Lake City sidd at cs.utah.edu =head1 COPYRIGHT Copyright (c) 2006-2008, Ted Pedersen and Siddharth Patwardhan This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. =cut