#!/usr/local/bin/perl -wT use strict; # where do we connect to the Similarity server? Here: my $remote_host = '127.0.0.1'; my $remote_port = '31134'; my $doc_base = '../docs'; use CGI; use Socket; BEGIN { # Our University's webserver uses an ancient version of CGI::Carp # so we can't do fatalsToBrowser. # The carpout() function lets us modify the format of messages sent to # a filehandle (in this case STDERR) to include timestamps use CGI::Carp 'carpout'; carpout(*STDOUT); } # subroutine prototypes sub showForm ($$$$); sub round ($); my $cgi = CGI->new; # These are the colors of the text when we alternate text colors (when # showing errors, for example). my $text_color1 = 'black'; my $text_color2 = '#d03000'; # print the HTTP header print $cgi->header; # if the showform parameter is no, then don't show the form--this is how # we avoid showing the form in popups my $showform = $cgi->param ('showform') || 'yes'; # show the start of the page (all the usual HTML that goes at the top # of a page, etc.) showPageStart (); # check if we want to show the version information (version of WordNet, etc.) my $showversion = $cgi->param ('version'); if ($showversion) { socket (Server, PF_INET, SOCK_STREAM, getprotobyname ('tcp')); my $internet_addr = inet_aton ($remote_host) or die "Could not convert $remote_host to an Internet addr: $!\n"; my $paddr = sockaddr_in ($remote_port, $internet_addr); unless (connect (Server, $paddr)) { print "

Cannot connect to server $remote_host:$remote_port

\n"; goto SHOW_END; } select ((select (Server), $|=1)[0]); print Server "v\015\012\015\012"; print "

Version information

\n"; while (my $line = ) { last if $line eq "\015\012"; if ($line =~ /^v (\S+) (\S+)/) { print "

$1 version $2

\n"; } elsif ($line =~ m/^! (.*)/) { print "

$1

\n"; } else { print "

Strange message from server: $line\n"; } } local $ENV{PATH} = "/usr/local/bin:/usr/bin:/bin:/sbin"; my $t_osinfo = `uname -a` || "Couldn't get system information: $!"; # $t_osinfo is tainted. Use it in a pattern match and $1 will # be untainted. $t_osinfo =~ /(.*)/; print "

HTTP server: $ENV{HTTP_HOST} ($1)

\n"; print "

Similarity server: $remote_host

\n"; goto SHOW_END; } # check if we're generating this page as the result of a query; if so, then # we need to show the results. my $word1 = $cgi->param ('word1'); my $word2 = $cgi->param ('word2'); if ($word1 and !$word2) { print "

Word 2 was not specified.

"; } elsif (!$word1 and $word2) { print "

Word 1 was not specified.

"; } elsif ($word1 and $word2) { print "
\n"; socket (Server, PF_INET, SOCK_STREAM, getprotobyname ('tcp')); my $internet_addr = inet_aton ($remote_host) or die "Could not convert $remote_host to an Internet addr: $!\n"; my $paddr = sockaddr_in ($remote_port, $internet_addr); unless (connect (Server, $paddr)) { print "

Cannot connect to server $remote_host:$remote_port

\n"; goto SHOW_END; } select ((select (Server), $|=1)[0]); # value of the parameters can be 'all', 'gloss', or 'synset' my $w1option = $cgi->param ('senses1'); my $w2option = $cgi->param ('senses2'); my $query_type = 2; if ($w1option eq 'gloss' or $w1option eq 'synset') { $query_type = 1 unless $word1 =~ m/[^#]+\#[nvars]\#\d+/; } if ($w2option eq 'gloss' or $w2option eq 'synset') { $query_type = 1 unless $word2 =~ m/[^#]+\#[nvars]\#\d+/; } if ($query_type == 1) { my @senses1; my @senses2; if ($w1option eq 'gloss') { print Server "g $word1\015\012"; } elsif ($w1option eq 'synset') { print Server "s $word1\015\012"; } elsif ($w1option eq 'all') { @senses1 = ([$word1, ""]); } else { print "
Internal error: invalid option `$w1option'
\n"; } if ($w2option eq 'gloss') { print Server "g $word2\015\012"; } elsif ($w2option eq 'synset') { print Server "s $word2\015\012"; } elsif ($w2option eq 'all') { @senses2 = ([$word2, ""]); } else { print "
Internal error: invalid option `$w2option'
\n"; } print Server "\015\012"; while (my $response = ) { last if $response eq "\015\012"; my $prefix = substr $response, 0, 1; my $end = substr $response, 2; if ($prefix eq 'g') { my ($wps, $gloss) = m/([^#]+\#[nvar]\#\d+) (.*)/; print "

$wps: $gloss

"; } elsif ($prefix eq '1') { my ($wps, $gloss) = $end =~ m/([^#]+\#[nvar]\#\d+) (.*)/; push @senses1, [$wps, $gloss]; } elsif ($prefix eq '2') { my ($wps, $gloss) = $end =~ m/([^#]+\#[nvar]\#\d+) (.*)/; push @senses2, [$wps, $gloss]; } else { print "Strange message from server `$response'"; } } my $measure = $cgi->param ('measure') || 'path'; showForm (1, \@senses1, \@senses2, $measure); showPageEnd (); exit; } else { my $measure = $cgi->param ('measure'); my $trace = $cgi->param ('trace') ? 'yes' : 'no'; my $gloss = $cgi->param ('gloss') ? 'yes' : 'no'; my $root = $cgi->param ('rootnode') ? 'yes' : 'no'; my $syns = $cgi->param ('synset') ? 'yes' : 'no'; my $all_senses = $cgi->param ('sense') ? 1 : 0; $word1 =~ tr/A-Z /a-z_/; $word2 =~ tr/A-Z /a-z_/; # terminate all messages with CRLF (best to avoid \r\n because the # meaning of \r and \n varies from platform to platform if ($measure eq 'all') { foreach my $m (qw/hso lch lesk lin jcn path res vector vector_pairs wup/) { print Server +("r $word1 $word2 $m $trace $gloss $syns $root", "\015\012"); } print Server "\015\012"; } else { print Server ("r $word1 $word2 $measure $trace $gloss $syns $root", "\015\012\015\012"); } my @glosses; my %scores; my @errors; my @synsets; my @version_info; my $lines = 0; my $last_measure = ''; while (my $response = ) { last if $response eq "\015\012"; $lines++; my $beginning = substr $response, 0, 1; my $end = substr $response, 2; if ($beginning eq '!') { $end =~ s/\s+$//; push @errors, $end; } elsif ($beginning eq 'r') { my ($measure, $wps1, $wps2, $score) = split /\s+/, $end; $score = round ($score); $last_measure = $measure; push @{$scores{$measure}}, [$score, $wps1, $wps2]; } elsif ($beginning eq 't') { $end =~ s||
|g; ### FIXME -- we lost the traces push @{$scores{$last_measure}->[-1]}, "$end\n"; } elsif ($beginning eq 'g') { my ($wps, @gloss_words) = split /\s+/, $end; push @glosses, [$wps, substr ($end, length ($wps))]; } elsif ($beginning eq 's') { my (@synset_words) = split /\s+/, $end; push @synsets, [@synset_words]; } elsif ($beginning eq 'v') { my ($package, $version) = split /\s+/, $end; push @version_info, [$package, $version]; } else { push @errors, "Error: received strange message from server `$response'"; } } my $query_string = $ENV{QUERY_STRING} || ""; # replace literal ampersands with their XML entity equivalents $query_string =~ s/&/&/g; if (scalar @version_info) { foreach my $item (@version_info) { print "

$item->[0] version $item->[1]

\n"; } goto SHOW_END; } # show errors, if any if (scalar @errors) { unless ($cgi->param ('errors') eq 'show') { my $query = $query_string . '&errors=show'; my $url = "similarity.cgi?${query}"; # Having onclick return false should keep the browser from # loading the page specified by href, but IE loads it # anyways. That's why we set href to # instead of the # URL (setting it to the URL would let non-JavaScript # browsers see the page in the main window, but such # browsers are rare) print +("

", "View errors", '

', "\n"); } else { print '

Warnings/Errors:

'; print '

'; my $parity = 0; foreach (0..$#errors) { my $color = $parity ? $text_color1 : $text_color2; print "

$errors[$_]
"; $parity = !$parity; } print "

\n"; goto SHOW_END; } } # show glosses, if any if ($gloss eq 'yes') { my $parity = 0; if (scalar @glosses) { print '

Glosses:

'; print '

'; print "

"; foreach my $ref (@glosses) { print "
$ref->[0]
$ref->[1]
"; } print "
\n"; } else { print "

Sorry, no glosses were found.

\n"; } goto SHOW_END; } else { my $query = $query_string . '&gloss=yes'; my $url = "similarity.cgi?${query}"; print +('

', "", "View glosses (definitions)", '

', "\n"); } if ($syns eq 'yes') { # show complete synsets, if any were requested if (scalar @synsets) { print '

Synsets:

'; print '

'; my $parity = 1; foreach (0..$#synsets) { my $color = $parity ? $text_color1 : $text_color2; print "

{"; print join (', ', @{$synsets[$_]}); print "}
"; $parity = !$parity; } print "

\n"; } else { print "

Sorry, no synsets were found.

\n"; } goto SHOW_END; } else { my $query = $query_string . '&synset=yes'; my $url = "similarity.cgi?${query}"; print +('

', "View synsets", '

', "\n"); } if ($all_senses) { print '

Results:

' if scalar keys %scores; print ''; print ''; print '' if $trace eq 'yes'; print "\n"; foreach my $m (keys %scores) { my @scrs = sort {$b->[0] <=> $a->[0]} @{$scores{$m}}; foreach (@scrs) { my $wps1 = $_->[1]; $wps1 =~ s/\#/%23/g; my $wps2 = $_->[2]; $wps2 =~ s/\#/%23/g; print ""; print ""; print ""; print ""; if ($trace eq 'yes') { print ""; } print "\n"; } } print "
MeasureWord 1Word 2ScoreTrace
$m$_->[1]$_->[2]$_->[0]$_->[3]
\n"; } else { my $query = $query_string; # remove from the query string options that we don't want $query =~ s/(?:&)sense=yes//; $query =~ s/(?:&)?trace=yes//; # now add the option we do want $query .= '&sense=yes'; # prepare two query strings--one without traces and one with my $url_nt = "similarity.cgi?${query}"; # 'nt' means 'no trace' my $url_trace = $url_nt . '&trace=yes'; goto SHOW_END unless scalar keys %scores; print '

Results:

'; foreach my $m (keys %scores) { my $good = $scores{$m}->[0]; foreach my $i (1..$#{$scores{$m}}) { if ($scores{$m}->[$i]->[0] > $good->[0]) { $good = $scores{$m}->[$i]; } } my $wps1 = $good->[1]; $wps1 =~ s/\#/%23/g; my $wps2 = $good->[2]; $wps2 =~ s/\#/%23/g; print +("\n

", "The relatedness of ", "$good->[1] ", "and $good->[2] ", "using $m is $good->[0].", "

\n"); if ($trace eq 'yes') { print "

$good->[3]

"; } } print +("

", "View relatedness of all senses (without traces)

\n"); print +("

", "View relatedness of all senses (with traces)

\n"); } unless ($trace eq 'yes') { my $urltrace = "similarity.cgi?${query_string}&trace=yes"; print +("

", "View traces

\n"); } SHOW_END: print "
"; close Server; } } $word1 = defined $word1 ? $word1 : ""; $word2 = defined $word2 ? $word2 : ""; my $measure = 'path';#defined $measure ? $measure : 'path'; showForm (2, $word1, $word2, $measure) unless $showform eq 'no'; showPageEnd (); exit; # ========= subroutines ========= sub round ($) { my $num = shift; my $str = sprintf ("%.4f", $num); $str =~ s/\.?0+$//; return $str; } sub showPageStart { print <<"EOINTRO"; Similarity

WordNet::Similarity web interface

Read an overview of WordNet::Similarity.

EOINTRO } sub showForm ($$$$) { my ($type, $arg1, $arg2, $arg3) = @_; # the 'action' attribute for the HTML form below--should be the script # name my $action = 'similarity.cgi'; print <<"EOFORM1";

You may enter any two words in one of three formats:

  1. word
  2. word#part_of_speech (where part_of_speech is one of n, v, a, or r)
  3. word#part_of_speech#sense (where sense is a positive integer)

If words are entered in format 1 or 2, then the relatedness of all valid forms of the words will be computed (e.g., if 'dogs' is entered, then 'dog' will be used to compute relatedness). More instructions.

EOFORM1 # check if we are trying to get the user to type in a pair of words or # if the user needs to select senses from a option menu. if ($type == 2) { # the user needs to type in two words print <<"EOT";

EOT } else { # the user needs to select word senses from a menu print "\n"; print "
\n"; print "\n"; print "
\n"; } print '', "\n"; print '\n"; print <<"EOFORM"; About the measures

Show version info


EOFORM } sub showPageEnd { print <<'ENDOFPAGE'; ENDOFPAGE } __END__ =head1 NAME similarity.cgi - a CGI script implementing a portion of a web interface for WordNet::Similarity =head1 DESCRIPTION This script works in conjunction with similarity_server.pl and wps.cgi to provide a web interface for WordNet::Similarity. The documentation for similarity_server.pl describes how messages are passed between this script and that one. =head1 AUTHORS Ted Pedersen, University of Minnesota Duluth tpederse at d.umn.edu Jason Michelizzi, University of Minnesota Duluth mich0212 at d.umn.edu =head1 BUGS None known. =head1 COPYRIGHT Copyright (c) 2005, Ted Pedersen and Jason Michelizzi This program is free software; you may redistribute and/or modify it under the terms of the GNU General Public License version 2 or, at your option, any later version. =cut