#! /usr/local/bin/perl -w # # compounds.pl version 2.04 # (Last updated $Id: compounds.pl,v 1.12 2008/04/13 09:27:52 sidz1979 Exp $) # # ----------------------------------------------------------------------------- use strict; use Getopt::Long; # Now get the options! our ($opt_version, $opt_help, $opt_wnpath); &GetOptions("version", "help", "wnpath=s"); # If the version information has been requested if(defined $opt_version) { &printVersion(); exit; } # If detailed help has been requested if(defined $opt_help) { &printHelp(); exit; } # Check if path to WordNet Data files has been provided ... If so ... save it. my ($wnPCPath, $wnUnixPath); if(defined $opt_wnpath) { $wnPCPath = $opt_wnpath; $wnUnixPath = $opt_wnpath; } elsif (defined $ENV{WNSEARCHDIR}) { $wnPCPath = $ENV{WNSEARCHDIR}; $wnUnixPath = $ENV{WNSEARCHDIR}; } elsif (defined $ENV{WNHOME}) { $wnPCPath = $ENV{WNHOME} . "\\dict"; $wnUnixPath = $ENV{WNHOME} . "/dict"; } else { $wnPCPath = "C:\\Program Files\\WordNet\\3.0\\dict"; $wnUnixPath = "/usr/local/WordNet-3.0/dict"; } my $line; open(NIDX, $wnUnixPath."/index.noun") || open(NIDX, $wnPCPath."\\noun.idx") || die "Unable to open index file.\n"; open(VIDX, $wnUnixPath."/index.verb") || open(VIDX, $wnPCPath."\\verb.idx") || die "Unable to open index file.\n"; open(AIDX, $wnUnixPath."/index.adj") || open(AIDX, $wnPCPath."\\adj.idx") || die "Unable to open index file.\n"; open(RIDX, $wnUnixPath."/index.adv") || open(RIDX, $wnPCPath."\\adv.idx") || die "Unable to open index file.\n"; foreach(1 .. 29) { $line = ; } while($line = ) { $line =~ s/[\r\f\n]//g; $line =~ s/^\s+//; $line =~ s/\s+$//; my ($word) = split(/\s+/, $line, 2); print "$word\n" if($word =~ /_/); } foreach(1 .. 29) { $line = ; } while($line = ) { $line =~ s/[\r\f\n]//g; $line =~ s/^\s+//; $line =~ s/\s+$//; my ($word) = split(/\s+/, $line, 2); print "$word\n" if($word =~ /_/); } foreach(1 .. 29) { $line = ; } while($line = ) { $line =~ s/[\r\f\n]//g; $line =~ s/^\s+//; $line =~ s/\s+$//; my ($word) = split(/\s+/, $line, 2); print "$word\n" if($word =~ /_/); } foreach(1 .. 29) { $line = ; } while($line = ) { $line =~ s/[\r\f\n]//g; $line =~ s/^\s+//; $line =~ s/\s+$//; my ($word) = split(/\s+/, $line, 2); print "$word\n" if($word =~ /_/); } close(NIDX); close(VIDX); close(AIDX); close(RIDX); # Subroutine to print detailed help sub printHelp { &printUsage(); print "\nThis program generates a list of all compound words found\n"; print "in WordNet\n"; print "Options: \n"; print "--wnpath WNPATH specifies the path of the WordNet data files.\n"; print " Ordinarily, this path is determined from the \$WNHOME\n"; print " environment variable. But this option overides this\n"; print " behavior.\n"; print "--help Displays this help screen.\n"; print "--version Displays version information.\n\n"; } # Subroutine to print minimal usage notes sub minimalUsageNotes { &printUsage(); print "Type compounds.pl --help for detailed help.\n"; } # Subroutine that prints the usage sub printUsage { print "compounds.pl [{ --wnpath WNPATH | --help | --version }]\n" } # Subroutine to print the version information sub printVersion { print "compounds.pl version 2.04\n"; print "Copyright (c) 2005-2008, Ted Pedersen, Satanjeev Banerjee, Siddharth Patwardhan and Jason Michelizzi.\n"; } __END__ =head1 NAME compounds.pl - extract compound words (collocations) from WordNet =head1 SYNOPSIS compounds.pl [--wnpath=PATH | --help | --version] =head1 DESCRIPTION B extracts compound words (collocations) from WordNet and writes the resultant list to the standard output. =head1 OPTIONS B<--wnpath>=I Location of the WordNet data files (e.g., /usr/local/WordNet-3.0/dict) =head1 BUGS Report to WordNet::Similarity mailing list : L =head1 SEE ALSO L WordNet::Similarity home page : L =head1 AUTHORS Ted Pedersen, University of Minnesota, Duluth tpederse at d.umn.edu Satanjeev Banerjee, Carnegie Mellon University, Pittsburgh banerjee+ at cs.cmu.edu Siddharth Patwardhan, University of Utah, Salt Lake City sidd at cs.utah.edu =head1 COPYRIGHT Copyright (c) 2005-2008, Ted Pedersen, Satanjeev Banerjee, and Siddharth Patwardhan This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. =cut