package WWW::Scraper::Beaucoup;
#####################################################################
use strict;
use vars qw(@ISA $VERSION);
@ISA = qw(WWW::Scraper);
$VERSION = sprintf("%d.%02d", q$Revision: 1.7 $ =~ /(\d+)\.(\d+)/);
use WWW::Scraper(qw(1.48 trimLFs trimLFLFs));
# SAMPLE
# http://www.Beaucoup.com/js/jobsearch-results.html?loc=CA-San+Jose+Area&cat=Computing%2FMIS-Software+Development&srch=Perl&job=1
my $scraperRequest =
{
'type' => 'FORM' # Type of query generation is 'QUERY'
# This is the basic URL on which to build the query.
,'url' => 'http://Beaucoup.com'
# This is the Scraper attributes => native input fields mapping
,'nativeQuery' => 'q'
,'nativeDefaults' =>
{ 'query' => undef
# ,'phrases' => 'off'
# ,'rpp' => '10'
# ,'cb' => 'Beaucoup'
# ,'qtype' => '0'
# ,'lang' => '1'
# ,'timeout' => '4'
,'Search.x' => 1
,'Search.y' => 1
}
,'fieldTranslations' =>
{
'*' =>
{ '*' => '*'
}
}
# Some more options for the Scraper operation.
,'cookies' => 0
};
my $scraperFrame =
[ 'HTML',
[
# This page shows 1-10 out of a total of 20 results for:
[ 'COUNT', 'There are (\d+) results for:' ]
,[ 'COUNT', '\d+-\d+ out of (\d+) for ' ]
,[ 'COUNT', '\d+-\d+ out of a total of (\d+) results for' ]
,[ 'NEXT', 'next' ]
,[ 'HIT*',
[
['REGEX','\d+\.\s]*>(.*?)
(.*?)
', 'url', 'title','description']
#1. sky-scraper.net: best sites for GRAND THEFT AUTO
#
sky-scraper.net, Visit Casino On Net and receive up to $200 sign up bonus. Home. Sat, 12 Apr 2003 GMT. ...
#
http://gta.sky-scraper.net/ (Netscape)
Open link in new window
]
]
]
];
sub testParameters {
my ($self) = @_;
if ( ref $self ) {
$self->{'isTesting'} = 1;
}
return {
'SKIP' => &WWW::Scraper::TidyXML::isNotTestable()
,'testNativeQuery' => 'scraper'
,'expectedOnePage' => 5
,'expectedMultiPage' => 20
,'expectedBogusPage' => 2000
};
}
# Access methods for the structural declarations of this Scraper engine.
sub scraperRequest { $scraperRequest }
sub scraperFrame { $_[0]->SUPER::scraperFrame($scraperFrame); }
sub scraperDetail{ undef }
1;
__END__
=pod
=head1 NAME
WWW::Scraper::Beaucoup - Scrapes Beaucoup's Super Search
=head1 SYNOPSIS
use WWW::Scraper;
use WWW::Scraper::Response::Job;
$search = new WWW::Scraper('Beaucoup');
$search->setup_query($query, {options});
while ( my $response = $scraper->next_response() ) {
# $response is a WWW::Scraper::Response::Job.
}
=head1 DESCRIPTION
Beaucoup extends WWW::Scraper.
It handles making and interpreting Beaucoup searches of F.
=head1 OPTIONS
=over 8
=item loc
Many, many strings are allowed. Locations are categorized by state.
See Beaucoup.com for these option values ("3648 locations!" as of June 2001)
=item cat
--- All Categories ---
Clerical/Administrative
Computing/MIS
Customer Service/Support
Education/Training
Engineering
Financial Services
Government/Non Profit
Health Care
Human Resources
Manufacturing/Business Operations
Marketing/Advertising
Media
Other
Professional Services
Sales
Travel/Hospitality
To this you need to add a "-" and the "job function", or
you may specify "All Job Functions in Category" by leaving off the "-" and "job function".
The options for job function are dependant on the Job Category, so
for some of the categories the functions are:
=back
=over 16
=item Clerical/Administrative
Other
=item Computing/MIS
Database Administration
Internet Development
Network/System Administration
Other
Quality Assurance/Testing
Software Development
Systems Analysis
Technical Support/Help Desk
=item Customer Service/Support
Other
=item Education/Training
Colleges/Universities
K to 12 Education
Other
Technical/Trade Schools
Training
=item Engineering
Chemical
Civil
Design/Industrial
Electrical/Hardware
Mechanical
Operations
Other
=item Financial Services
Accounting
Banking
Finance
Insurance
Other
Securities/Asset Management
=item Government/Non Profit
Other
=item Health Care
Administration
Medical
Nursing
Other
Pharmaceutical
=item Human Resources
Other
=item Manufacturing/Business Operations
Construction/Trades
Facilities Management
Logistics/Distribution
Manufacturing
Other
Program/Project Management
Purchasing
=item Marketing/Advertising
Advertising
Market Research
Marketing Communications
Other
Product Management
Public Relations
=item Media
Broadcasting
Graphic Arts/Design
Journalism
Other
Publishing/Technical Writing
=item Other
Other
=item Professional Services
Legal Services
Management Consulting
Other
=item Sales
Account Management
Business Development
Direct Sales
Merchandising/Retail
Other
=item Travel/Hospitality
Other
Restaurant/Food Services
Travel/Recreation/Lodging
=back
=head1 AUTHOR
C is written and maintained
by Glenn Wood, http://search.cpan.org/search?mode=author&query=GLENNWOOD.
=head1 COPYRIGHT
Copyright (c) 2001 Glenn Wood
All rights reserved.
This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
=cut