package WWW::Scraper::Lycos; ##################################################################### require Exporter; @EXPORT = qw(); @EXPORT_OK = qw(trimTags); @ISA = qw(WWW::Scraper Exporter); $VERSION = sprintf("%d.%02d", q$Revision: 1.0 $ =~ /(\d+)\.(\d+)/); use Carp (); use WWW::Scraper(qw(3.03 generic_option addURL trimTags trimLFs removeScriptsInHTML)); use strict; # Example query - (old) http://search.lycos.com/main/default.asp?lpv=1&loc=searchhp&query=Perl # (new) http://search.lycos.com/default.asp?lpv=1&loc=searchhp&tab=web&query=turntable my $scraperRequest = { # This engine is driven from it's
page 'type' => 'QUERY' # This is the basic URL on which to get the form to build the query. ,'url' => 'http://search.lycos.com/default.asp?' # specify defaults, by native field names ,'nativeQuery' => 'query' ,'nativeDefaults' => { 'lpv' => '1' ,'loc' => 'searchhp' ,'tab' => 'web' } # specify translations from canonical fields to native fields ,'fieldTranslations' => { '*' => { 'skills' => 'query' ,'*' => '*' } } # Miscellaneous options for the Scraper operation. ,'cookies' => 0 }; my $scraperFrame = [ 'HTML', [ [ 'NEXT', 'Next' ] #Next ,[ 'COUNT', 'Showing\s+Results\s+[\d-]+\s+of\s+([\d,]+)'] ,[ 'BODY', '', '', [ [ 'TR' ] ,[ 'HIT*', # sponsored links [ [ 'TR', [ #[ 'TD', 'number' ] [ 'A', 'url', 'title' ] ,[ 'FONT', 'sponsored' ] ,[ 'BR', 'description' ] ] ] ,[ 'TR' ] ] ] ,[ 'HIT*', # web results [ [ 'BODY', '', '', [ [ 'REGEX', '', 'relevance' ] ,[ 'A', 'url', 'title' ] ,[ 'BR', 'description' ] ] ] ] ] ] ] ] ]; # Lycos has opening tags, but no closing ones. # That seems pretty handy, but gets in the way of TidyXML. sub removeDescriptionTags { my ($self, $hit, $xml) = @_; $$xml =~ s{- (.*?)}{$1}gsi; $$xml =~ s{- }{}gsi; return $xml; } sub testParameters { my ($self) = @_; if ( ref $self ) { $self->{'isTesting'} = 1; } return { 'TODO' => '' ,'testNativeQuery' => 'turntable' ,'expectedOnePage' => 9 ,'expectedMultiPage' => 20 ,'expectedBogusPage' => 1 }; } # Access methods for the structural declarations of this Scraper engine. sub scraperRequest { $scraperRequest } sub scraperFrame { return $scraperFrame; $_[0]->SUPER::scraperFrame($scraperFrame); } 1; __END__ =pod =head1 NAME WWW::Scraper::Lycos - Scrapes www.Lycos.com =head1 SYNOPSIS require WWW::Scraper; $search = new WWW::Scraper('Lycos'); =head1 DESCRIPTION This class is an Lycos specialization of WWW::Search. It handles making and interpreting Lycos searches F. =head1 AUTHOR and CURRENT VERSION C is written and maintained by Glenn Wood, http://search.cpan.org/search?mode=author&query=GLENNWOOD. =head1 COPYRIGHT Copyright (c) 2001 Glenn Wood All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut