!/usr/local/bin/perl -w =head1 WebService::Google-Hack Web Interface =head1 SYNOPSIS The WebService::Google-Hack web interface provides an easy to use interface for some of the features of WebService::Google-Hack. =head1 DESCRIPTION To install the interface please follow these steps: 1) Create a directory named ghack in your cgi-bin directory (Where all your cgi files reside). So it should be something like: /webspace/cgi-bin/ghack 2) Next, copy the file named google_hack.cgi, which is given with the distribution of the google-hack package into your cgi-bin/ghack/ directory. 3) Open the google_hack.cgi file, and change the lib path to the path where WebService::GoogleHack has been installed on your machine. use lib "/home/lib/perl5/site_perl/"; *Note: The google_hack.cgi file is in the WebInterface directory of GoogleHack. For eg: WebService/GoogleHack/WebInterface. 4) Now, in the google_hack.cgi file (which is also given in the WebInterface directory of GoogleHack), Set the remotr_host, and remote_port variables to the correct values. $remote_host = ''; $remote_port = ''; The remote host will be the IP address of the machine where the google_hack server will be running. The remote port needs to be the same as the $localport variable in ghack_server.pl You should now be able to use the web interface. =head1 AUTHOR Ted Pedersen, Etpederse@d.umn.eduE Pratheepan Raveendranathan, Erave0029@d.umn.eduE Jason Michelizzi, Emich0212@d.umn.eduE Date 11/08/2004 =head1 COPYRIGHT AND LICENSE Copyright (c) 2003 by Pratheepan Raveendranathan, Ted Pedersen, Jason Michelizzi This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to The Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut use strict; ########################################################## # Change to path # ########################################################## use lib ""; ########################################################## # Change to host ip address and port # ########################################################## my $remote_host = ''; my $remote_port = ''; use CGI; use Socket; BEGIN { use CGI::Carp 'carpout'; carpout(*STDOUT); } my $cgi = CGI->new; # print the HTTP header print $cgi->header; my $action=$cgi->param ('action'); my $type=$cgi->param ('opt'); my $key = $cgi->param ('apikey'); my $words; my $frequency; my $numPages; my $numIterations; my $wordS1; my $wordS2; my $review; if(!defined($action)) { $action="first"; } if($action eq "first") { showPageStart(); } if($action eq "Submit") { if($type eq "wordcluster") { WordClusters(); } elsif($type eq "pmi") { PMI(); } elsif($type eq "review") { Review(); } } if($action eq "Generate") { $words = $cgi->param ('words');; $frequency = $cgi->param ('cutoff');; $numPages = $cgi->param ('numres');; $numIterations=$cgi->param ('numiters');;; generateWordCluster(); } if($action eq "PMIMeasure") { $wordS1 = $cgi->param ('searchString1'); $wordS2 = $cgi->param ('searchString2'); generatePMI(); #$numIterations = $cgi->param ('apikey');; } showPageEnd (); exit; sub showPageStart { print <<"EOINTRO"; Google-Hack

p r o j e c t  

            g o o g l e    - h a c k  


Use GoogleHack





 

(Please enter your Google API license key here, if you dont have one you can get it @ http://www.google.com/apis.
Or to proceed with default google-hack developer\'s key, select the feature that you would like to use and click on submit.)



Developers

Ted Pedersen ,    Pratheepan Raveendranathan EOINTRO } sub WordClusters { print <<"Word_Clusters";

p r o j e c t  

            g o o g l e    - h a c k  


Word Clusters

Parameters

Word_Clusters print " (This will be the number of web pages to parse, Defaults to 10, Maximum 50 )
\n"; print <<"Word_Clusters1";

Word_Clusters1 print " (Words with frequency less than given would not be considered, Max 20)
\n"; print <<"Word_Clusters2";

Word_Clusters2 print " (This will be the number of iterations)
\n"; print <<"Word_Clusters3";


(New Line/Space Delimited World List)

Word_Clusters3 } sub generateWordCluster { socket (Server, PF_INET, SOCK_STREAM, getprotobyname ('tcp')); my $internet_addr = inet_aton ($remote_host) or die "Could not convert $remote_host to an Internet addr: $!\n"; my $paddr = sockaddr_in ($remote_port, $internet_addr); unless (connect (Server, $paddr)) { print "

Cannot connect to server $remote_host:$remote_port

\n"; close Server; } select ((select (Server), $|=1)[0]); $words=~s/\s+/:/g; print Server "c\t$words\t$numPages\t$frequency\t$numIterations\t\015\012\015\012"; print <<"temp";

p r o j e c t  

            g o o g l e    - h a c k  


temp print "\nGoogle Hack Word Cluster Results for "; my @terms=(); my @temp= split(/:/, $words); foreach my $word (@temp) { if($word ne "") { print "
$word"; } } print "

Frequency Cutoff: $frequency
# of Web Pages: $numPages
# of Iterations: $numIterations
" ; while (my $line = ) { last if $line eq "\015\012"; print "
$line"; } local $ENV{PATH} = "/usr/local/bin:/usr/bin:/bin:/ghack"; my $t_osinfo = `uname -a` || "Couldn't get system information: $!"; # $t_osinfo is tainted. Use it in a pattern match and $1 will # be untainted. $t_osinfo =~ /(.*)/; # print "

HTTP server: $ENV{HTTP_HOST} ($1)

\n"; # print "

Google server: $remote_host

\n"; print "
"; close Server; } sub PMI { print <<"PMI";

p r o j e c t  

            g o o g l e    - h a c k  


PMI Measure

(This feature allows you to find the Pointwise Mutual Information measure between two terms)

(Enter a term like dog)

(Enter a term like cat)

PMI } sub generatePMI { socket (Server, PF_INET, SOCK_STREAM, getprotobyname ('tcp')); my $internet_addr = inet_aton ($remote_host) or die "Could not convert $remote_host to an Internet addr: $!\n"; my $paddr = sockaddr_in ($remote_port, $internet_addr); unless (connect (Server, $paddr)) { print "

Cannot connect to server $remote_host:$remote_port

\n"; close Server; } select ((select (Server), $|=1)[0]); $wordS1=~s/\s+//g; $wordS2=~s/\s+//g; print Server "p\t$wordS1\t$wordS2\015\012\015\012"; print <<"temp";

p r o j e c t  

            g o o g l e    - h a c k  


temp print "\nGoogle Hack PMI Measure for "; print "
$wordS1 AND $wordS2"; print "
PMI Measure: "; while (my $line = ) { last if $line eq "\015\012"; print "
$line"; } local $ENV{PATH} = "/usr/local/bin:/usr/bin:/bin:/ghack"; my $t_osinfo = `uname -a` || "Couldn't get system information: $!"; $t_osinfo =~ /(.*)/; print "
"; close Server; } sub showPageEnd { print <<'ENDOFPAGE'; ENDOFPAGE } __END__