#!/usr/bin/perl -w use warnings; use strict; use IO::Socket; use WordNet::QueryData; use WordNet::Tools; use WordNet::SenseRelate::AllWords; use WordNet::Similarity; use Getopt::Long; my $wnlocation = '/usr/local/WordNet-3.0/dict'; my $localhost = '127.0.0.1'; my $localport = 32323; my $logfile; my $help; my $version; my $client; my $text; my @tokens; my $line; my @sentences; my $sentence; my $temp; # for accessing %options hash my @context; my $contextfile="./user_data/tmp_client_input.txt"; my $contextflag=0; my $stoplistflag=0; my $leskconfigfile="./user_data/lesk-stoplist.conf"; my $vectorconfigfile="./user_data/vector-stoplist.conf"; my $windowSize; my $format; my $scheme; my $i=0; my $j=0; # result variables my $status; # to store the status of system commands to create #and move directories... my $val; # for reading each word after disambiguation my $ok = GetOptions ('logfile=s' => \$logfile, 'wnlocation=s' => \$wnlocation, 'port=i' => \$localport, help => \$help, version => \$version, ); $ok or exit 1; if ($help) { showUsage ("Long"); exit; } if ($version) { print "allwords_server.pl - WordNet::SenseRelate::AllWords web interface server\n"; print 'Last modified by : $Id: allwords_server.pl,v 1.39 2009/05/27 19:56:57 kvarada Exp $'; print "\n"; exit; } unless (defined $logfile) { print STDERR "The --logfile argument is required. This is the logfile path for allwords_server.pl log\n"; showUsage (); exit 1; } my $success = open LFH, ">>$logfile"; if(!$success) { print "\nCannot open $logfile for writing: $!"; } else { print "\nWriting log in $logfile"; } print LFH "WordNet Location => $wnlocation\n"; #. .................................... # # Creating WordNet::QueryData object. # #...................................... my $qd = WordNet::QueryData->new($wnlocation); $qd or die "\nCouldn't construct WordNet::QueryData object"; print LFH "\nWordNet::QueryData object sucessfully created"; my %options; my $stopword; my $stopwordflag=0; my $istagged=0; my $showversion=0; my $usr_dir; my $tracefilename; my $resultfilename; my $doc_base = "../../htdocs/allwords/user_data"; # This is the name of the logfile of AllWords.pm. The file will be # stored in directory of the webserver # #........................................................................ # # Compoundifying is done using compoundify method of WordNet::Tools. # #........................................................................ my $wntools = WordNet::Tools->new($qd); $wntools or die "\nCouldn't construct WordNet::Tools object"; print LFH "\nWordNet::SenseRelate::Tools object sucessfully created"; my $sock = IO::Socket::INET->new( LocalPort => $localport, Listen => SOMAXCONN, Reuse => 1, Type => SOCK_STREAM ) or die "Could not bind to network port: $! \n"; print LFH "\nSocket created with following details \nLocalHost => $localhost\nLocalPort => $localport\nProto => tcp"; print LFH "\n[Server $0 accepting clients]\n"; while ($client = $sock->accept()){ $client->autoflush(1); print LFH "\nClient $client is accepted\n"; %options= (wordnet => $qd, wntools => $wntools); @sentences=(); $sentence=""; $text=""; $contextflag=0; $stoplistflag=0; while(defined ($line = <$client>)) { chomp($line); @tokens=split(/:/,$line); if( $line =~ /:/) { # get version information my $qdver = $qd->VERSION (); my $wnver = $wntools->hashCode (); my $simver = $WordNet::Similarity::VERSION; my $allwordsver = $WordNet::SenseRelate::AllWords::VERSION; print LFH "\nv WordNet $wnver"; print LFH "\nv WordNet::QueryData $qdver"; print LFH "\nv WordNet::Similarity $simver"; print LFH "\nv WordNet::SenseRelate::AllWords $allwordsver"; print $client "v WordNet $wnver\n"; print $client "v WordNet::QueryData $qdver\n"; print $client "v WordNet::Similarity $simver\n"; print $client "v WordNet::SenseRelate::AllWords $allwordsver\n"; $showversion=1; print LFH "\nShow verrion flag => $showversion"; close($client); last; } elsif($line =~ //) { $contextflag=1; open (CFH, '>>', "$contextfile") or die "Cannot open $contextfile : $!"; } elsif($line =~ //) { $contextflag=0; close CFH; } elsif($contextflag == 1 && $line =~ //) { print CFH $tokens[1]; print CFH "\n"; } elsif ($line =~ /:/) { $doc_base=$tokens[1]; print LFH "\nDocument Base => $doc_base"; } elsif ($line =~ /:/) { $usr_dir="$tokens[1]"."_server"; print LFH "\nUser Directory => $usr_dir"; $status=system("mkdir $usr_dir"); $status == 0 ? print LFH "\n created dir $usr_dir.":print LFH "\nDir already present or error creating dir $usr_dir"; $contextfile="$usr_dir"."/context.txt"; $tracefilename="$usr_dir"."/trace.txt"; $resultfilename="$usr_dir"."/results.txt"; print LFH "\nTrace file name => $tracefilename"; } elsif ($line =~ /:/) { $showversion=0; print LFH "\nContextfile => $contextfile"; open (CFH, '<', "$contextfile") or die "Cannot open $contextfile: $!"; while() { $text=$text.$_; } $text =~ s/\r+//g; @sentences = split(/\n+/,$text); close CFH; } elsif ($line =~ /:/) { $windowSize=$tokens[1]; print LFH "\nWindow Size => $windowSize"; }elsif ($line =~ /:/) { $format=$tokens[1]; $istagged = ($format eq 'tagged') ? 1 : 0; print LFH "\nformat => $format"; $istagged eq 1 ? print LFH "\ntagged text => YES": print LFH "\ntagged text => NO" ; $options{wnformat} = 1 if $format eq 'wntagged'; $options{wnformat} ? print LFH "\nwntagged text => YES": print LFH "\nwntagged text => NO" ; }elsif ($line =~ /:/) { $scheme=$tokens[1]; print LFH "\nscheme => $scheme"; }elsif ($line =~ /:/) { $options{trace} = $tokens[1]; }elsif ($line =~ /:/) { $options{pairScore} = $tokens[1]; }elsif ($line =~ /:/) { $options{forcepos} = 1; }elsif ($line =~ /:/) { $options{nocompoundify} = 1; }elsif ($line =~ /:/) { $options{usemono} = 1; }elsif ($line =~ /:/) { $options{backoff} = 1; }elsif ($line =~ /:/) { $options{measure} = "WordNet::Similarity::"."$tokens[1]"; }elsif ($line =~ /:/) { $options{contextScore} = $tokens[1]; }elsif ($line =~ /:/) { $options{stoplist} = "$usr_dir/"."$tokens[1]"; }elsif($line =~ //) { $stoplistflag=1; open (SFH, '>>', "$options{stoplist}") or die "Cannot open $options{stoplist} : $!"; } elsif($line =~ //) { $stoplistflag=0; close SFH; } elsif($stoplistflag == 1 && $line =~ // && defined $tokens[1]) { print SFH $tokens[1]; print SFH "\n"; } elsif($line eq "\0012") { last; } } if (!$showversion) { $options{config} = $leskconfigfile if ($options{measure} eq "WordNet::Similarity::lesk"); $options{config} = $vectorconfigfile if ($options{measure} eq "WordNet::Similarity::vector"); print LFH "\nThe options are: \n"; foreach $temp (keys(%options)) { print LFH "$temp=>".$options{$temp} . "\n"; } my $obj = WordNet::SenseRelate::AllWords->new(%options); $obj ? print LFH "\nWordNet::SenseRelate::AllWords object successfully created":print LFH "\nCouldn't construct WordNet::SenseRelate::AllWords object"; open RFH, '>', $resultfilename or print "Cannot open $resultfilename for writing: $!"; foreach $sentence (@sentences) { chomp($sentence); @context=split(/ +/,$sentence); #..................................................................... # # This is the call to disambigute the sentence which client has sent # #..................................................................... my @res = $obj->disambiguate (window => $windowSize, scheme => $scheme, tagged => $istagged, context => [@context]); #........................................................................ # # AllWords.pm returns words with suffixes attached to it. # If #o is attached, the word is a stopword # If #ND is attached the word is not defined in WordNet # If #NR is attached no relatedness found with the surrounding words # If #IT is attached, the word has invalid tag # Otherwise, the chosen sense along with the part of speech is sent to # the client # #........................................................................ print RFH join (' ', @context), "\n"; print RFH join (' ', @res), "\n"; print LFH join (' ', @context), "\n"; print LFH join (' ', @res), "\n"; print $client join (' ', @context), "\015\012"; print $client join (' ', @res), "\015\012"; for($i=0,$j=0; $i<=$#res ; $i++,$j++) { my $val; my $tagindex=index($res[$i],"#"); my $tag=substr $res[$i], $tagindex; if($format eq 'raw') { if($res[$i] =~ /\_/ && $context[$j] !~ /\_/){ my $count = ($res[$i] =~ tr/\_//); $val=$res[$i]; $j=$j+$count; }else{ $val=$context[$j].$tag; } } elsif($format eq 'tagged') { my ($tw,$tt)= ( $context[$j] =~ /(\S+)\/(\S+)/); $val=$tw.$tag; } elsif($format eq 'wntagged') { my ($tw,$tt)= split /\#/, $context[$j]; $val=$tw.$tag; } if($val =~ /\#o/ ) { print LFH "\n$val : stopword\n"; print RFH "\n$val : stopword\n"; print $client "\n$val : stopword\015\012"; } elsif($val =~ /\#ND/) { print LFH "\n$val : not in WordNet\n"; print RFH "\n$val : not in WordNet\n"; print $client "\n$val : not in WordNet\015\012"; } elsif($val =~ /\#NR/) { print LFH "\n$val: No relatedness found with the surrounding words\n"; print RFH "\n$val: No relatedness found with the surrounding words\n"; print $client "\n$val: No relatedness found with the surrounding words\015\012"; } elsif($val =~ /\#IT/) { print LFH "\n$val: Invalid Tag\n"; print RFH "\n$val: Invalid Tag\n"; print $client "\n$val: Invalid Tag\015\012"; } elsif($val =~ /\#NT/) { print LFH "\n$val: No Tag\n"; print RFH "\n$val: No Tag\n"; print $client "\n$val: No Tag\015\012"; } elsif($val =~ /\#CL/) { print LFH "\n$val: Closed Class Word\n"; print RFH "\n$val: Closed Class Word\n"; print $client "\n$val: Closed Class Word\015\012"; } elsif($val =~ /\#MW/) { print LFH "\n$val: Missing Word\n"; print RFH "\n$val: Missing Word\n"; print $client "\n$val: Missing Word\015\012"; } else { my ($gloss) = $qd->querySense ($res[$i], "glos"); print LFH "\n$val : $gloss\n"; print RFH "\n$val : $gloss\n"; print $client "\n$val : $gloss\015\012"; } } if ($options{trace}) { open TFH, '>', $tracefilename or print "Cannot open $tracefilename for writing: $!"; print TFH join (' ', @res), "\n"; print $client "\015\012"; print $client join (' ', @res), "\015\012"; my $tstr = $obj->getTrace(); print TFH "$tstr \n"; print $client "$tstr \015\012"; print $client "\015\012"; print LFH "$tstr \n"; close TFH; } } close RFH; close($client); } } sub showUsage { my $long = shift; print "Usage: allwords_server.pl --logfile FILE \n"; print " [--wnlocation WordNet path] [--port PORT] \n"; print " | {--help | --version}\n"; if ($long) { print "Options:\n"; print "\t--logfile FILE logfile path for allwords.pl log\n"; print "\t--wnlocation WordNet path WordNet path\n"; print "\t--port PORTNUMBER Specify the port PORTNUMBER for the server to listen on \n"; print "\t--help show this help message\n"; print "\t--version show version information\n"; } } =head1 NAME allwords_server.pl - [Web] The server for allwords.cgi and version.cgi =head1 DESCRIPTION This script implements the backend of the web interface for WordNet::SenseRelate::AllWords This script listens to a port waiting for a request form allwords.cgi or version.cgi. If disambiguation request is made by allwords.cgi, the server first gets input options from allwords.cgi. Then it creates AllWords object. Using AllWords object and input options disambiguate method is called. The result returned by disambiguate is checked and appropriate message is sent back to allwords.cgi client. Client-Server Communication The server loads all the required modules and listens to the port 32323. The client sends informtation with a preamble to know the server what kind of input data it is going to get. For example, the client reads the text to be disambiguated from the user and sends the context file to the server as below context-line 1 context-line 2 context-line 3 . . . The tags and are not going to conflict with the text to be disambiguated as we clean the text before disambiguation and hence the characters '<' and '>' will be removed from the text. If the version information is requested, appropriate version information of the respective components is fetched and is passed to version.cgi client. If the client requests for trace level, then trace output is fetched calling getTrace() method of AllWords.pm. Along with sending all information to the client, the server also stores all the input data and result files on the server machine in a unique directory for each client. =head1 AUTHORS Varada Kolhatkar, University of Minnesota, Duluth kolha002 at d.umn.edu Ted Pedersen, University of Minnesota, Duluth tpederse at d.umn.edu This document last modified by : $Id: allwords_server.pl,v 1.39 2009/05/27 19:56:57 kvarada Exp $ =head1 SEE ALSO allwords.cgi, version.cgi, README.web.pod =head1 COPYRIGHT AND LICENSE Copyright (c) 2008, Varada Kolhatkar, Ted Pedersen, Jason Michelizzi Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.2 or any later version published by the Free Software Foundation; with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts. Note: a copy of the GNU Free Documentation License is available on the web at L and is included in this distribution as FDL.txt. =cut