#!/usr/local/bin/perl -wT # # similarity_server.pl version 2.04 # (Last updated $Id: similarity_server.pl,v 1.6 2008/04/17 08:32:00 sidz1979 Exp $) # # --------------------------------------------------------------------- use strict; use Getopt::Long; use File::Temp; use File::Spec; use WordNet::Similarity; use POSIX ':sys_wait_h'; # for waitpid() and friends; used by reaper() use POSIX qw(setsid); # to daemonize our($opt_wnhome, $opt_port, $opt_logfile, $opt_maxchild, $opt_stoplist, $opt_version, $opt_help); &GetOptions("wnhome=s", "port=i", "logfile=s", "maxchild=i", "stoplist=s", "version", "help"); # Check for version if(defined($opt_version)) { print "similarity_server.pl version 2.04\n"; print "WordNet::Similarity version ".($WordNet::Similarity::VERSION)."\n"; print "Copyright (c) 2005-2008, Ted Pedersen, Siddharth Patwardhan and Jason Michelizzi.\n"; exit; } # Check for help if(defined($opt_help)) { print "Usage: similarity_server.pl [--port PORTNUMBER] [--logfile LOGFILE] [--maxchild NUM] [--stoplist STOPFILE]\n"; print " | --help\n"; print " | --version\n"; print "\nStarts the similarity server, which listens for requests on a predefined\n"; print "port. It presents a netork interface to the WordNet::Similarity moduels.\n\n"; print "Options:\n"; print "--port Specify the port PORTNUMBER for the server to listen on.\n"; print "--logfile The output LOGFILE where any error or warning messages\n"; print " should be written out.\n"; print "--maxchild Specify the maximum number NUM of the processes that should\n"; print " be forked to handle the requests.\n"; print "--stoplist A file STOPFILE of stop words to be provided to the lesk and\n"; print " vector modules.\n"; print "--help Display this help message and quit.\n"; print "--version Display the version information and quit.\n"; exit; } # Local variables my $localport = 31134; my $error_log = undef; my $stoplist; my $maxchild = 4; # max number of child processes at one time sub reaper; # Set the log file, if specified $error_log = $1 if(defined($opt_logfile) and $opt_logfile ne "" and $opt_logfile =~ /^(.*)$/); print STDERR "Error log = ".($error_log?$error_log:"")."\n"; # Set the stop list, if specified $stoplist = $1 if(defined($opt_stoplist) and $opt_stoplist ne "" and $opt_stoplist =~ /^(.*)$/); print STDERR "Stoplist = ".($stoplist?$stoplist:"")."\n"; # Set the port $localport = $opt_port if(defined($opt_port)); print STDERR "Local port = $localport\n"; # Set the maxchild $maxchild = $opt_maxchild if(defined($opt_maxchild)); print STDERR "Maxchild = $maxchild\n"; my $lockfh = File::Temp->new(); my $lock_file = $lockfh->filename(); die "Error: Unable to create temporary lock file.\n" if(!$lockfh); print $lockfh $$; close $lockfh or die "Error: Cannot close lock file: $! \n"; print STDERR "Loading modules... "; # prototypes: sub getAllForms ($); sub getlock (); sub releaselock (); sub timestamp ($); use sigtrap handler => \&bailout, 'normal-signals'; use IO::Socket::INET; use WordNet::QueryData; use WordNet::Tools; use WordNet::Similarity::hso; use WordNet::Similarity::jcn; use WordNet::Similarity::lch; use WordNet::Similarity::lesk; use WordNet::Similarity::lin; use WordNet::Similarity::path; use WordNet::Similarity::random; use WordNet::Similarity::res; use WordNet::Similarity::vector; use WordNet::Similarity::vector_pairs; use WordNet::Similarity::wup; my $wnlocation = undef; $wnlocation = File::Spec->catfile($opt_wnhome, "dict") if(defined($opt_wnhome)); my $wn = WordNet::QueryData->new($wnlocation); $wn or die "Error: Couldn't construct WordNet::QueryData object.\n"; my $wntools = WordNet::Tools->new($wn); $wntools or die "Error: Unable to create WordNet::Tools object.\n"; our $hso = WordNet::Similarity::hso->new($wn); our $jcn = WordNet::Similarity::jcn->new($wn); our $lch = WordNet::Similarity::lch->new($wn); my $leskfh = File::Temp->new(); my $leskcfg = $leskfh->filename(); die "Error: Unable to create temporary config file.\n" if(!$leskfh); print $leskfh "WordNet::Similarity::lesk\n"; print $leskfh "stem::1\n"; print $leskfh "stop::$stoplist\n" if(defined($stoplist) && -e $stoplist); close $leskfh or die "Error: Unable to close config file.\n"; our $lesk = WordNet::Similarity::lesk->new($wn, $leskcfg); my $vectorfh = File::Temp->new(); my $vectorcfg = $vectorfh->filename(); die "Error: Unable to create temporary config file.\n" if(!$vectorfh); print $vectorfh "WordNet::Similarity::vector_pairs\n"; print $vectorfh "stop::$stoplist\n" if(defined($stoplist) && -e $stoplist); print $vectorfh "stem::1\n"; close $vectorfh or die "Error: Unable to close config file.\n"; our $vector_pairs = WordNet::Similarity::vector_pairs->new($wn, $vectorcfg); my $vectorpfh = File::Temp->new(); my $vectorpcfg = $vectorfh->filename(); die "Error: Unable to create temporary config file.\n" if(!$vectorpfh); print $vectorpfh "WordNet::Similarity::vector\n"; print $vectorpfh "stop::$stoplist\n" if(defined($stoplist) && -e $stoplist); print $vectorpfh "stem::1\n"; close $vectorpfh or die "Error: Unable to close config file.\n"; our $vector = WordNet::Similarity::vector->new($wn, $vectorpcfg); our $lin = WordNet::Similarity::lin->new($wn); our $path = WordNet::Similarity::path->new($wn); our $random = WordNet::Similarity::random->new($wn); our $res = WordNet::Similarity::res->new($wn); our $wup = WordNet::Similarity::wup->new($wn); my @measures = ($hso, $jcn, $lch, $lesk, $lin, $path, $random, $res, $wup, $vector, $vector_pairs); foreach (@measures) { my ($err, $errstr) = $_->getError(); die "$errstr died.\n" if $err; } undef @measures; # reset (untaint) the PATH $ENV{PATH} = '/bin:/usr/bin:/usr/local/bin'; print STDERR "done.\n"; print STDERR "Starting server... going into background.\n"; # Daemonize open STDIN, '/dev/null' or die "Can't read /dev/null: $! \n"; open STDOUT, '>>/dev/null' or die "Can't write to /dev/null: $! \n"; # The is the socket we listen to my $socket = IO::Socket::INET->new( LocalPort => $localport, Listen => SOMAXCONN, Reuse => 1, Type => SOCK_STREAM ) or die "Could not bind to network port: $! \n"; print STDERR "Closing output to terminal.\n"; if(defined($error_log)) { print STDERR "All future messages will be routed to the log file.\n"; if(!open(STDERR, '>>', $error_log)) { print "Error: Could open error log.\n"; die "Error: Could not re-open STDERR.\n"; } chmod 0664, $error_log; } else { print STDERR "No more messages will be printed (even if the server dies).\n"; open STDERR, '>>/dev/null' or die "Can't write to /dev/null: $! \n"; } chdir '/' or die(×tamp("Can't chdir to /: $! \n")); defined(my $pid = fork) or die(×tamp("Can't fork: $! \n")); exit if $pid; setsid or die(×tamp("Can't start a new session: $! \n")); umask 0; # this variable is incremented after every fork, and is # updated by reaper() when a child process dies my $num_children = 0; ## SEE BELOW # automatically reap child processes #$SIG{CHLD} = 'IGNORE'; ## ## BETTER WAY: # handle death of child process $SIG{CHLD} = \&reaper; my $interrupted = 0; ACCEPT: while((my $client = $socket->accept) or $interrupted) { $interrupted = 0; next unless $client; # a SIGCHLD was raised # check to see if it's okay to handle this request if($num_children >= $maxchild) { print $client "busy\015\012"; $client->close; undef $client; next ACCEPT; } my $childpid; # fork; let the child handle the actual request if($childpid = fork) { # This is the parent $num_children++; # go wait for next request undef $client; next ACCEPT; } # This is the child process defined $childpid or die(×tamp("Could not fork: $! \n")); # here we're the child, so we actually handle the request my @requests; while(my $request = <$client>) { last if $request eq "\015\012"; push @requests, $request; } foreach my $i (0..$#requests) { my $request = $requests[$i]; my $rnum = $i + 1; my $type = 'UNDEFINED'; if($request =~ m/^(\w)\b/) { $type = $1; } else { $type = 'UNDEFINED'; } if($type eq 'v') { eval { # get version information my $qdver = $wn->VERSION(); my $wnver = $wntools->hashCode(); my $simver = $WordNet::Similarity::VERSION; print $client "v WordNet $wnver\015\012"; print $client "v WordNet::QueryData $qdver\015\012"; print $client "v WordNet::Similarity $simver\015\012"; }; print(STDERR ×tamp("$@\n")) if($@); } elsif($type eq 's') { my (undef, $word) = split /\s+/, $request; if(!defined($word) || $word eq "") { print $client "! Usage: s WORD\015\012"; goto EXIT_CHILD; } my @senses = getAllForms($word); unless(scalar @senses) { print $client "! $word was not found in WordNet\015\012"; goto EXIT_CHILD; } getlock; foreach my $wps (@senses) { eval { my @synset = $wn->querySense($wps, "syns"); print $client "$rnum $wps ", join(" ", @synset), "\015\012"; }; print(STDERR ×tamp("$@\n")) if($@); } releaselock; } elsif($type eq 'g') { my $word = undef; (undef, $word) = split /\s+/, $request; if(!defined($word) || $word eq "") { print $client "! Usage: g WORD\015\012"; goto EXIT_CHILD; } my @senses = getAllForms($word); unless(scalar @senses) { print $client "! $word was not found in WordNet\015\012"; goto EXIT_CHILD; } getlock; foreach my $wps (@senses) { eval { my ($gloss) = $wn->querySense($wps, "glos"); print $client "$rnum $wps ${gloss}\015\012"; }; print(STDERR ×tamp("$@\n")) if($@); } releaselock; } elsif($type eq 'r') { my (undef, $word1, $word2, $measure, $trace, $gloss, $syns, $root)= split /\s+/, $request; unless(defined $word1 and defined $word2) { print $client "! Error: undefined input words\015\012"; sleep 2; goto EXIT_CHILD; } my $module; if($measure =~ /^(?:hso|jcn|lch|lesk|lin|path|random|res|wup|vector|vector_pairs)$/) { no strict 'refs'; $module = $$measure; unless(defined $module) { print $client "! Error: Couldn't get reference to measure\015\012"; sleep 2; goto EXIT_CHILD; } } else { print $client "! Error: no such measure $measure\015\012"; sleep 2; goto EXIT_CHILD; } my @wps1 = getAllForms($word1); unless(scalar @wps1) { print $client "! $word1 was not found in WordNet\015\012"; goto EXIT_CHILD; } my @wps2 = getAllForms($word2); unless(scalar @wps2) { print $client "! $word2 was not found in WordNet\015\012"; goto EXIT_CHILD; } if(defined($trace) and $trace eq 'yes') { $module->{trace} = 1; } $module->{rootNode} = (defined($root) and $root eq 'yes') ? 1 : 0; if((defined($gloss) and $gloss eq 'yes') or (defined($syns) and $syns eq 'yes')) { getlock; foreach my $wps ((@wps1, @wps2)) { if(defined($gloss) and $gloss eq 'yes') { eval { my ($gls) = $wn->querySense($wps, 'glos'); print $client "g $wps $gls\015\012"; }; print(STDERR ×tamp("$@\n")) if($@); } if(defined($syns) and $syns eq 'yes') { eval { my @syns = $wn->querySense($wps, 'syns'); print $client "s ", join(" ", @syns), "\015\012"; }; print(STDERR ×tamp("$@\n")) if($@); } } releaselock; } getlock; foreach my $wps1 (@wps1) { foreach my $wps2 (@wps2) { eval { my $score = $module->getRelatedness($wps1, $wps2); my ($err, $errstr) = $module->getError(); if($err) { print $client "! $errstr\015\012"; } else { print $client "r $measure $wps1 $wps2 $score\015\012"; } if(defined($trace) and $trace eq 'yes') { my $tracestr = $module->getTraceString(); $tracestr =~ s/[\015\012]+//g; print $client "t $tracestr\015\012"; } }; print(STDERR ×tamp("$@\n")) if($@); } } releaselock; # reset traces to off $module->{trace} = 0; } else { print $client "! Bad request type `$type'\015\012"; } } # Terminate ALL messages with CRLF (\015\012). Do NOT use # \r\n (the meaning of \r and \n varies on different platforms). EXIT_CHILD: print $client "\015\012"; $client->close; $socket->close; # don't let the child accept: exit; } $socket->close; exit; sub getAllForms ($) { my $word = shift; # check if it's a type III string already: return $word if $word =~ m/[^#]+\#[nvar]\#\d+/; # it must be a type I or II, so let's get all valid forms getlock; my @forms = (); eval { @forms = $wn->validForms($word); }; print(STDERR ×tamp("$@\n")) if($@); releaselock; return () unless scalar @forms; my @wps_strings; # for each valid form, get all valid wps strings foreach my $form (@forms) { # form is a type II string getlock; my @strings = (); eval { @strings = $wn->querySense($form); }; print(STDERR ×tamp("$@\n")) if($@); releaselock; next unless scalar @strings; push @wps_strings, @strings; } return @wps_strings; } # A signal handler, good for most normal signals (esp. INT). Mostly we just # want to close the socket we're listening to and delete the lock file. sub bailout { my $sig = shift; $sig = defined $sig ? $sig : "?UNKNOWN?"; $socket->close if defined $socket; print(STDERR ×tamp("Bailing out (SIG$sig).\n")); releaselock if($lockfh); exit 1; } use Fcntl qw/:flock/; # gets a lock on $lockfh. The return value is that of flock. sub getlock () { open($lockfh, '>>', $lock_file) or die(×tamp("Cannot open lock file $lock_file: $! \n")); eval{flock $lockfh, LOCK_EX;}; print(STDERR ×tamp("$@\n")) if($@); } # releases a lock on $lockfh. The return value is that of flock. sub releaselock () { eval { flock $lockfh, LOCK_UN; close $lockfh; }; print(STDERR ×tamp("$@\n")) if($@); } # attach a time stamp sub timestamp ($) { my $instring = shift; return $instring if(!defined($instring)); my @monthNames = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec); my ($second, $minute, $hour, $dayOfMonth, $month, $yearOffset, $dayOfWeek, $dayOfYear) = localtime(); my $year = 1900 + $yearOffset; return "["."$dayOfMonth/$monthNames[$month]/$year:$hour:$minute:$second"."] $instring"; } # sub to reap child processes (so they don't become zombies) # also updates the num_children variable # # Sub was loosely inspired by an example at # http://www.india-seo.com/perl/cookbook/ch16_20.htm sub reaper { my $moribund; if(my $pid = waitpid(-1, WNOHANG) > 0) { $num_children-- if WIFEXITED($?); } $interrupted = 1; $SIG{CHLD} = \&reaper; # cursed be SysV } __END__ =head1 NAME similarity_server.pl - [Web] The backend WordNet::Similarity server for the Web Interface =head1 SYNOPSIS Usage: similarity_server.pl [--port PORTNUMBER] [--logfile LOGFILE] [--maxchild NUM] [--stoplist STOPFILE] | --help | --version =head1 DESCRIPTION This script implements the backend of the web interface for WordNet::Similarity. This script listens to a port waiting for a request form similarity.cgi or wps.cgi. The client script sends a message to this script as series of queries (see QUERY FORMAT). After all the queries, the client sends a message containing only CRLF (carriage-return line-feed, or \015\012). The server (this script) responds with the results (see MESSAGE FORMAT) terminated by a message containing only CRLF. =head2 Example: Client: g car#n#1CRLF CRLF Sever responds: g car#n#1 4-wheeled motor vehicle; usually propelled by an internal combustion engine; "he needs a car to get to work"CRLF CRLF =head1 OPTIONS B<--port>=I Specify the port PORTNUMBER for the server to listen on. B<--logfile>=I The output LOGFILE where any error or warning messages should be written out. B<--maxchild>=I Specify the maximum number NUM of the processes that should be forked to handle the requests. B<--stoplist>=I A file STOPFILE of stop words to be provided to the lesk and vector modules. B<--help> Display the help message and quit. B<--version> Display the version information and quit. =head1 QUERY FORMAT means carriage-return line-feed "\r\n" on Unix, "\n\r" on Macs, \015\012 everywhere and anywhere (i.e., don't use \n or \r, use \015\012). The queries consist of messages in the following formats: s - server will return all senses of word1 and word2 g - server will return the gloss for each synset to which word belongs r - server will return the relatedness of wps1 and wps2 using measure. v - get version information =head1 MESSAGE FORMAT The messages sent from this server will be in the following formats: ! - indicates an error or warning g - the gloss of wps r - the relatedness score of wps1 and wps2 t - the trace output for the previous relatedness score s ... - a synset v - the version of 'package' being used =head1 BUGS Report to WordNet::Similarity mailing list : L =head1 SEE ALSO L WordNet home page : L WordNet::Similarity home page (provides link to web interface): L =head1 AUTHORS Ted Pedersen, University of Minnesota, Duluth tpederse at d.umn.edu Siddharth Patwardhan, University of Utah sidd at cs.utah.edu Jason Michelizzi =head1 COPYRIGHT Copyright (c) 2005-2008, Ted Pedersen, Siddharth Patwardhan and Jason Michelizzi This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to: The Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. =cut