package WWW::Scraper::NorthernLight;
#####################################################################
use strict;
use vars qw($VERSION @ISA);
@ISA = qw(WWW::Scraper);
$VERSION = sprintf("%d.%02d", q$Revision: 1.0 $ =~ /(\d+)\.(\d+)/);
use Carp ();
use WWW::Scraper(qw(2.27 generic_option addURL trimTags));
use WWW::Scraper::FieldTranslation;
my $scraperRequest =
{
'type' => 'FORM' # Type of query generation is 'QUERY'
,'formNameOrNumber' => 'powSearch'
,'submitButton' => 'search'
# This is the basic URL on which to build the query.
,'url' => 'http://www.northernlight.com/power.html'
# This is the Scraper attributes => native input fields mapping
,'nativeQuery' => 'qr'
,'nativeDefaults' => {
'qr' => undef
}
,'fieldTranslations' =>
{
'*' =>
{ 'skills' => 'qr'
# ,'payrate' => undef
# ,'locations' => new WWW::Scraper::FieldTranslation('NorthernLight', 'Job', 'locations')
,'*' => '*'
}
}
# Some more options for the Scraper operation.
,'cookies' => 0
};
my $scraperFrame =
[ 'HTML',
[
# found 10,032,977 items
[ 'COUNT', 'found\s+([0-9,]+)\s+items?']
,[ 'NEXT', 'alt="Next Page"' ]
,[ 'BODY', '', '',
[
[ 'HIT*',
[
[ 'BODY', '', '',
[
[ 'TR',
[
#
[ 'SNIP', '',
[
[ 'TD',
[['SPAN', 'number']]
]
,[ 'TD',
[
[ 'A', 'url', 'title' ]
,['REGEX', '(\d+)% -', 'relevance']
,['REGEX', '(.*?) ', 'source']
,['REGEX', '(.*?)
', 'description']
,['REGEX', '(.*?)', 'miscBlock']
,[ 'TABLE',
[
['TR']
,['REGEX', '(\d+)%:', 'secondRelevance']
,[ 'A', 'secondUrl', 'secondTitle' ]
]
]
#,['SPAN', 'avail'] #needs better treatment of the at the top of this
for this to work.
,[ 'AQ', 'more\s+results', 'moreResultsUrl', undef ]
#
]
]
]
]
]
]
]
]
]
]
]
]
]
];
# Access methods for the structural declarations of this Scraper engine.
sub scraperRequest { $scraperRequest }
sub scraperFrame { $_[0]->SUPER::scraperFrame($scraperFrame); }
sub scraperDetail{ undef }
sub testParameters {
my ($self) = @_;
if ( ref $self ) {
$self->{'isTesting'} = 1;
}
return {
'SKIP' => "NorthernLight's search engine seems to be down these days!?"
,'testNativeQuery' => 'search scraper'
,'expectedOnePage' => 9
,'expectedMultiPage' => 12
,'expectedBogusPage' => 0
};
}
1;
=pod
=head1 NAME
WWW::Scraper::NorthernLight - Scrapes NorthernLight.com
=head1 SYNOPSIS
require WWW::Scraper;
$search = new WWW::Scraper('NorthernLight');
=head1 DESCRIPTION
This class is an NorthernLight specialization of WWW::Search.
It handles making and interpreting NorthernLight searches
F.
This class exports no public interface; all interaction should
be done through WWW::Search objects.
=head1 OPTIONS
None at this time (2001.05.06)
=over 8
=item search_url=URL
Specifies who to query with the NorthernLight protocol.
The default is at
http://www.northernlight.com/power.html
=item search_debug, search_parse_debug, search_ref
Specified at L.
=back
=head1 SEARCH FIELDS
=head2 displayResultsPerPage - I
=over 8
=item "5" => 5
=item "10" => 10
=item "20" => 20
=item "50" => 50
=item "100" => 100
=back
=head2 postingAge - I
=over 8
=item "0" => any time
=item "1" => 1 day
=item "3" => 3 days
=item "7" => 1 week
=item "8" => 2 weeks
=item "10" => 1 month
=back
=head2 workTermTypeIDs - I
=over 8
=item "1" => Full Time
=item "2" => Part Time
=item "3" => Contract
=item "4" => Temporary/Seasonal
=item "5" => Internship
=back
=head2 countyIDs - I
=over 8
=item "0" => Any
=item "1" => Alameda
=item "2" => Contra Costa
=item "3" => Marin
=item "4" => Napa
=item "5" => San Benito
=item "6" => San Francisco
=item "7" => San Mateo
=item "8" => Santa Clara
=item "9" => Santa Cruz
=item "10" => Solano
=item "11" => Sonoma
=item "12" => Other
=back
=head2 jobPostingCategoryIDs => I
=over 8
=item "0" => Any
=item "1" => Accounting/Finance
=item "2" => Administrative/Clerical
=item "3" => Advertising
=item "4" => Aerospace/Aviation
=item "5" => Agricultural
=item "6" => Architecture
=item "7" => Arts/Entertainment
=item "8" => Assembly
=item "9" => Audio/Visual
=item "10" => Automotive
=item "11" => Banking/Financial Services
=item "12" => Biotechnology
=item "13" => Bookkeeping
=item "14" => Business Development
=item "15" => Child Care Services
=item "16" => Colleges & Universities
=item "17" => Communications/Media
=item "18" => Computer
=item "19" => Computer - Hardware
=item "20" => Computer - Software
=item "21" => Construction
=item "22" => Consulting/Professional Services
=item "23" => Customer Service/Support
=item "24" => Data Entry/Processing
=item "25" => Education/Training
=item "26" => Engineering
=item "27" => Engineering - Civil
=item "28" => Engineering - Hardware
=item "29" => Engineering - Software
=item "30" => Environmental
=item "31" => Executive/Management
=item "32" => Fund Raising/Development
=item "33" => Government/Civil Service
=item "34" => Graphic Design
=item "35" => Health Care/Health Services
=item "36" => Hospitality/Tourism
=item "37" => Human Resources
=item "38" => Information Technology
=item "39" => Insurance
=item "40" => Internet/E-Commerce
=item "41" => Law Enforcement/Security
=item "42" => Legal
=item "43" => Maintenance/Custodial
=item "44" => Manufacturing
=item "45" => Marketing
=item "46" => Miscellaneous
=item "47" => Non-Profit
=item "48" => Pharmaceutical
=item "49" => Printing/Publishing
=item "50" => Property Management/Facilities
=item "51" => Public Relations
=item "74" => Purchasing
=item "52" => QA/QC
=item "53" => Radio/Television/Film/Video
=item "54" => Real Estate
=item "57" => Receptionist
=item "55" => Recruiting/Staffing
=item "56" => Research
=item "58" => Restaurant/Food Service
=item "59" => Retail
=item "60" => Sales
=item "61" => Sales - Inside/Telemarketing
=item "62" => Sales - Outside
=item "63" => Security/Investment
=item "64" => Shipping/Receiving
=item "65" => Social Work/Services
=item "66" => Technical Support
=item "67" => Telecommunications
=item "68" => Training
=item "69" => Transportation
=item "70" => Travel
=item "71" => Warehouse
=item "72" => Web Design
=item "73" => Writer
=back
=head1 AUTHOR
C is written and maintained
by Glenn Wood, http://search.cpan.org/search?mode=author&query=GLENNWOOD.
=head1 COPYRIGHT
Copyright (c) 2001 Glenn Wood
All rights reserved.
This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
=cut
|