#!/usr/bin/perl -wT use strict; # where do we connect to the Similarity server? # note I put in my local host information just to give you an idea. # you should add your own though if you are using another server # you need to change the $remote_host and the $doc_base my $remote_host = 'localhost'; my $remote_port = '31135'; my $doc_base = '/umls_similarity/'; use CGI; use Socket; BEGIN { # Our University's webserver uses an ancient version of CGI::Carp # so we can't do fatalsToBrowser. # The carpout() function lets us modify the format of messages sent to # a filehandle (in this case STDERR) to include timestamps use CGI::Carp 'carpout'; carpout(*STDOUT); } # subroutine prototypes sub showForm ($$$$$$$$$); sub round ($); my $cgi = CGI->new; # These are the colors of the text when we alternate text colors (when # showing errors, for example). my $text_color1 = 'black'; my $text_color2 = '#d03000'; # print the HTTP header print $cgi->header; # if the showform parameter is no, then don't show the form--this is how # we avoid showing the form in popups my $showform = $cgi->param ('showform') || 'yes'; # show the start of the page (all the usual HTML that goes at the top # of a page, etc.) showPageStart (); # check if we want to show the version information (version of UMLS, etc.) my $showversion = $cgi->param ('version'); if ($showversion) { socket (Server, PF_INET, SOCK_STREAM, getprotobyname ('tcp')); my $internet_addr = inet_aton ($remote_host) or die "Could not convert $remote_host to an Internet addr: $!\n"; my $paddr = sockaddr_in ($remote_port, $internet_addr); unless (connect (Server, $paddr)) { print "

Cannot connect to server $remote_host:$remote_port

\n"; goto SHOW_END; } select ((select (Server), $|=1)[0]); print Server "v\015\012\015\012"; print "

Version information

\n"; while (my $line = ) { last if $line eq "\015\012"; if ($line =~ /^v (\S+) (\S+)/) { print "

$1 version $2

\n"; } elsif ($line =~ m/^! (.*)/) { print "

$1

\n"; } else { print "

Strange message from server: $line\n"; } } local $ENV{PATH} = "/usr/local/bin:/usr/bin:/bin:/sbin"; my $t_osinfo = `uname -a` || "Couldn't get system information: $!"; # $t_osinfo is tainted. Use it in a pattern match and $1 will # be untainted. $t_osinfo =~ /(.*)/; print "

HTTP server: $ENV{HTTP_HOST} ($1)

\n"; print "

Similarity server: $remote_host

\n"; goto SHOW_END; } # check if we're generating this page as the result of a query; if so, then # we need to show the results. my $word1 = $cgi->param ('word1'); my $word2 = $cgi->param ('word2'); if ($word1 and !$word2) { print "

Term 2 was not specified.

"; } elsif (!$word1 and $word2) { print "

Term 1 was not specified.

"; } elsif ($word1 and $word2) { print "
\n"; socket (Server, PF_INET, SOCK_STREAM, getprotobyname ('tcp')); my $internet_addr = inet_aton ($remote_host) or die "Could not convert $remote_host to an Internet addr: $!\n"; my $paddr = sockaddr_in ($remote_port, $internet_addr); unless (connect (Server, $paddr)) { print "

Cannot connect to server $remote_host:$remote_port

\n"; goto SHOW_END; } select ((select (Server), $|=1)[0]); # value of the parameters can be 'all', 'gloss', or 'synset' my $w1option = $cgi->param ('senses1'); my $w2option = $cgi->param ('senses2'); my $button = $cgi->param('button'); my %measurehash = (); $measurehash{'path'} = "Path Length"; $measurehash{'lch'} = "Leacock & Chodorow"; $measurehash{'wup'} = "Wu & Palmer"; $measurehash{'res'} = "Resnik"; $measurehash{'lin'} = "Lin"; $measurehash{'jcn'} = "Jiang & Conrath"; $measurehash{'cdist'} = "Conceptual Distance"; $measurehash{'nam'} = "Nguyen & Al-Mubaid"; $measurehash{'random'} = "Random Measure"; $measurehash{'vector'} = "Vector Measure"; $measurehash{'lesk'} = "Adapted Lesk"; my $query_type = 2; my $measure = ""; my $sab = ""; my $rel = ""; if($button eq "Compute Relatedness") { $measure = $cgi->param('relatedness'); $sab = $cgi->param('sabdef'); $rel = $cgi->param('reldef'); } else { $measure = $cgi->param ('similarity'); $sab = $cgi->param ('sab'); $rel = $cgi->param ('rel'); } my $gloss = $cgi->param ('gloss') ? 'yes' : 'no'; my $path = $cgi->param ('path') ? 'yes' : 'no'; my $all_senses = $cgi->param ('sense') ? 1 : 0; # terminate all messages with CRLF (best to avoid \r\n because the # meaning of \r and \n varies from platform to platform # if the word is a CUI get its preferred term if($word1=~/C[0-9]+/) { print Server "t|$word1|\015\012"; } if($word2=~/C[0-9]+/) { print Server "t|$word2|\015\012"; } # now get their similarity if ($measure eq 'all' && $button eq "Compute Similarity") { foreach my $m (qw/path wup lch res lin jcn random cdist nam/) { print Server +("r|$word1|$word2|$button|$m|$sab|$rel|", "\015\012"); } print Server "\015\012"; } elsif ($measure eq 'all' && $button eq "Compute Relatedness") { foreach my $m (qw/vector lesk/) { print Server +("r|$word1|$word2|$button|$m|$sab|$rel|", "\015\012"); } } else { print Server ("r|$word1|$word2|$button|$measure|$sab|$rel|", "\015\012"); } # get the path information if the similarity button is clicked if($button eq "Compute Similarity") { print Server "p|$word1|$word2|\015\012"; } # get the definitions of the possible CUIs of the words or the CUIs # themselves depending on what was entered print Server "g|$button|$word1|\015\012"; print Server "g|$button|$word2|\015\012"; print Server "\015\012"; my %terms = (); my %cuis = (); my %scores = (); my %paths = (); my @glosses = (); my @errors = (); my $pathflag = 0; my @version_info; my $lines = 0; my $last_measure = ''; while (my $response = ) { last if $response eq "\015\012"; $lines++; my $beginning = substr $response, 0, 1; my $end = substr $response, 2; if ($beginning eq '!') { $end =~ s/\s+$//; push @errors, $end; } elsif ($beginning eq 'r') { my ($measure, $wps1, $wps2, $score) = split /\s+/, $end; $score = round ($score); $last_measure = $measure; push @{$scores{$measure}}, [$score, $wps1, $wps2]; $cuis{$wps1} = $word1; $cuis{$wps2} = $word2; } elsif($beginning eq 't') { my ($cui, $word) = split/\s+/, $end; $word=~s/_/ /g; $terms{$cui} = $word; } elsif ($beginning eq 'g') { my ($wps, @gloss_words) = split /\s+/, $end; push @glosses, [$wps, substr ($end, length ($wps))]; } elsif($beginning eq 'p') { my @array = split/\|/, $end; my $i = 0; while($i <= $#array) { my $c1 = $array[$i]; $i++; my $c2 = $array[$i]; $i++; my $p = $array[$i]; $i++; if($c1=~/^\s*$/) { next; } if($c2=~/^\s*$/) { next; } push @{$paths{"$c1|$c2"}}, $p; $pathflag = 1; } } elsif ($beginning eq 'v') { my ($package, $version) = split /\s+/, $end; push @version_info, [$package, $version]; } else { push @errors, "Error: received strange message from server `$response'"; } } my $query_string = $ENV{QUERY_STRING} || ""; # replace literal ampersands with their XML entity equivalents $query_string =~ s/&/&/g; if (scalar @version_info) { foreach my $item (@version_info) { print "

$item->[0] version $item->[1]

\n"; } goto SHOW_END; } # show errors, if any if (scalar @errors) { unless ($cgi->param ('errors') eq 'show') { my $query = $query_string . '&errors=show'; my $url = "umls_similarity.cgi?${query}"; # Having onclick return false should keep the browser from # loading the page specified by href, but IE loads it # anyways. That's why we set href to # instead of the # URL (setting it to the URL would let non-JavaScript # browsers see the page in the main window, but such # browsers are rare) print +("

", "View errors", '

', "\n"); } else { print '

Warnings/Errors:

'; print '

'; my $parity = 0; foreach (0..$#errors) { my $color = $parity ? $text_color1 : $text_color2; print "

$errors[$_]
"; $parity = !$parity; } print "

\n"; goto SHOW_END; } } # show glosses, if any if ($gloss eq 'yes') { my $parity = 0; if (scalar @glosses) { print '

Definitions:

'; print '

'; print "

"; foreach my $ref (@glosses) { my $cui = $ref->[0]; my $word = $cuis{$cui}; my @defs = split/\|/, $ref->[1]; if($word=~/C[0-9]/) { $word = $terms{$word}; } print "
$word ($cui)
"; foreach my $def (@defs) { print "
$def
"; } } print "
\n"; } else { print "

Sorry, no definitions were found.

\n"; } goto SHOW_END; } else { my $query = $query_string . '&gloss=yes'; my $url = "umls_similarity.cgi?${query}"; print +('

', "", "View Definitions", '

', "\n"); } # show path information if similarity is desired, if any if($button eq "Compute Similarity") { if ($path eq 'yes') { my $parity = 0; if($pathflag > 0) { print '

Shortest Path Information

'; print '

'; print "

"; foreach my $item (sort keys %paths) { if($item=~/^\s*$/) { next; } my ($c1, $c2) = split/\|/, $item; print "The shortest path between $c1 and $c2 is:
"; foreach my $p (@{$paths{$item}}) { print "
$p

"; } } print "
\n"; } else { print "

Sorry, no path information was found.

\n"; } goto SHOW_END; } else { my $query = $query_string . '&path=yes'; my $url = "umls_similarity.cgi?${query}"; print +('

', "", "View Shortest Path", '

', "\n"); } } if ($all_senses) { print '

Results:

' if scalar keys %scores; print ''; print ''; print "\n"; foreach my $m (keys %scores) { my @scrs = sort {$b->[0] <=> $a->[0]} @{$scores{$m}}; foreach (@scrs) { my $wps1 = $_->[1]; $wps1 =~ s/\#/%23/g; my $wps2 = $_->[2]; $wps2 =~ s/\#/%23/g; print ""; print ""; print ""; print ""; print "\n"; } } print "
MeasureTerm 1Term 2Score
$m$_->[1]$_->[2]$_->[0]
\n"; } else { my $query = $query_string; # remove from the query string options that we don't want $query =~ s/(?:&)sense=yes//; $query =~ s/(?:&)?trace=yes//; # now add the option we do want $query .= '&sense=yes'; # prepare two query strings--one without traces and one with my $url_nt = "umls_similarity.cgi?${query}"; # 'nt' means 'no trace' my $url_trace = $url_nt . '&trace=yes'; goto SHOW_END unless scalar keys %scores; print '

Results:

'; foreach my $m (keys %scores) { my $good = $scores{$m}->[0]; foreach my $i (1..$#{$scores{$m}}) { if ($scores{$m}->[$i]->[0] > $good->[0]) { $good = $scores{$m}->[$i]; } } my $wps1 = $good->[1]; $wps1 =~ s/\#/%23/g; my $wps2 = $good->[2]; $wps2 =~ s/\#/%23/g; my $term1 = $word1; my $term2 = $word2; if($word1=~/C[0-9]+/) { $term1 = $terms{$word1}; } if($word2=~/C[0-9]+/) { $term2 = $terms{$word2}; } if($m=~/lesk|vector/) { print +("\n

", "The relatedness of $term1 (", "$good->[1] ", ") and $term2 ($good->[2] ", ") using $measurehash{$m} ($m) is $good->[0].", "

\nUsing:", "

    SABDEF :: include $sab

", "

    RELDEF :: include $rel

"); } else { print +("\n

", "The similarity of $term1 (", "$good->[1] ", ") and $term2 ($good->[2] ", ") using $measurehash{$m} ($m) is $good->[0].", "

\nUsing:", "

    SAB :: include $sab

", "

    REL :: include $rel

"); } } print +("

", "View relatedness of all possible senses

\n"); } SHOW_END: print "
"; close Server; } $word1 = defined $word1 ? $word1 : ""; $word2 = defined $word2 ? $word2 : ""; my $measure = 'path'; my $sab = 'MSH'; my $rel = 'PAR/CHD'; my $sabdef = 'UMLS_ALL'; my $reldef = 'CUI/PAR/CHD/RB/RN'; my $relatedness = 'vector'; showForm (2, $word1, $word2, $measure, $sab, $rel, $sabdef, $reldef, $relatedness) unless $showform eq 'no'; showPageEnd (); exit; # ========= subroutines ========= sub round ($) { my $num = shift; my $str = sprintf ("%.4f", $num); $str =~ s/\.?0+$//; return $str; } sub showPageStart { print <<"EOINTRO"; Similarity

UMLS::Similarity Web Interface

UMLS::Similarity is a freely available open source software package that can be used to obtain the similarity or relatedness between two biomedical terms from the Unified Medical Language System (UMLS).

EOINTRO } sub showForm ($$$$$$$$$) { my ($type, $arg1, $arg2, $arg3, $arg4, $arg5, $arg6, $arg7, $arg8) = @_; # the 'action' attribute for the HTML form below--should be the script # name my $action = 'umls_similarity.cgi'; print <<"EOFORM1";

DIRECTIONS: You may enter any two terms or Concept Unique Identifiers (CUIs) below. If terms are entered, then the relatedness or similarity of the possible CUIs will be computed and the pair with the highest score returned. The difference between similarity and relatedness is ....

Detailed instructions.
About the Similarity Measures.
About the Relatedness Measures.

EOFORM1 # check if we are trying to get the user to type in a pair of words or # if the user needs to select senses from a option menu. if ($type == 2) { # the user needs to type in two words print <<"EOT";




EOT } else { # the user needs to select word senses from a menu print "\n"; print "
\n"; print "\n"; print "

\n"; } print '
'; print 'Semantic Similarity'; print '', "\n"; print '
\n"; print '', "\n"; print '

\n"; print '', "\n"; print '

\n"; print <<"EOFORM";


EOFORM print '
'; print 'Semantic Relatedness'; print '', "\n"; print '
\n"; print '', "\n"; print '

\n"; print '', "\n"; print '

\n"; print <<"EOFORM2";


Show version info


EOFORM2 } sub showPageEnd { print <<'ENDOFPAGE'; ENDOFPAGE } __END__ =head1 NAME umls_similarity.cgi - a CGI script implementing a portion of a web interface for UMLS::Similarity =head1 DESCRIPTION This script works in conjunction with umls_similarity_server.pl and wps.cgi to provide a web interface for UMLS::Similarity. The documentation for umls_similarity_server.pl describes how messages are passed between this script and that one. =head1 AUTHORS Ted Pedersen, University of Minnesota Duluth tpederse at d.umn.edu Jason Michelizzi =head1 BUGS None known. =head1 COPYRIGHT Copyright (c) 2005-2008, Ted Pedersen and Jason Michelizzi This program is free software; you may redistribute and/or modify it under the terms of the GNU General Public License version 2 or, at your option, any later version. =cut