#===================================================================== # Monster search backend. # # Backend for searching monsterboard (www.monster.com). # Please read the documentation generated by POD. # # Coding conventions: # 4 space indent (tabs expanded to spaces) # 80 character max line length. # # $Id: Monster.pm,v 1.5 2004/06/26 20:05:21 craigk Exp $ #===================================================================== package WWW::Search::Monster; use HTML::TreeBuilder; use WWW::Search; use WWW::Search::Result; use strict; use vars qw( $VERSION $MAINTAINER @ISA ); @ISA = qw( WWW::Search ); $VERSION = '1.03'; $MAINTAINER = 'Craig Kadziolka '; #--------------------------------------------------------------------- # native_setup_search # # initial configuration of the search. all searches are routed # through http://jobsearch.monster.com regardless of country # as it is possible to search all countries from this point. # # argument 'country' contains the two-letter country code for # the desired country. The results are undeterminable if the # country code is invalid or unsupported by monster. # #--------------------------------------------------------------------- sub native_setup_search { my ($self, $native_query, $rhOptsArg) = @_; my $sCountryCode = $rhOptsArg->{'country'}; $self->user_agent('non-robot'); $self->{'search_base_url'} ||= 'http://jobsearch.monster.com'; $self->{'search_base_path'} ||= '/jobsearch.asp'; $sCountryCode = "&cy=$sCountryCode" if $sCountryCode; $self->{'_next_url'} = $self->{'search_base_url'} . $self->{'search_base_path'} .'?' . "q=$native_query$sCountryCode"; } #--------------------------------------------------------------------- # parse_tree # # parses a page of results. # at this point it is quite dependant on the current layout of # monster. assumes that they will have a title row in the results # table which contains the text "Job Title" # #--------------------------------------------------------------------- sub parse_tree { my $self = shift; my $tree = shift; my $item; my $results_node; my @items = $tree->look_down('_tag' => 'td'); foreach $item (@items) { next if $item == $tree; next if $item->as_HTML() =~ //i; my $item_text = $item->as_trimmed_text(); if ($item_text =~ /^Job Title$/i) { $results_node = $item; } } if (not $results_node) { return 0; } my $p = $results_node->parent(); # Now $p contains the heading row. One up should be the table itself. my $bt = $p->parent(); # Now the table is in $bt, it is possible to iterate over every row # except the first, gathering up the job links and returning them my @full_results; my $result_count = 0; my $result; my @results = $bt->look_down('_tag' => 'tr'); foreach my $result (@results) { next if $result == $bt; next if $result == $p; # Columns are: Date{0} Picture(if any){1} Title{2} Company{3} # Location{4} my @details = $result->look_down('_tag' => 'td'); my $date = $details[0]; my $title = $details[2]; my $company = $details[3]; my $location = $details[4]; (my $link) = $title->as_HTML() =~ /href\=\"([^"]*)\"/; my $hit = new WWW::Search::Result; my $base = $self->{'search_base_url'}; $hit->add_url("$base$link"); $hit->title($title->as_trimmed_text); push(@{$self->{cache}}, $hit); $self->{'_num_hits'}++; $result_count++; push @full_results, $hit; } # try to locate the next link... my $next_link_node = $tree->look_down('_tag' => 'a', sub { my $c = $_[0]; return ($c->as_trimmed_text =~ /Next page/i); }); if ($next_link_node) { (my $next_link) = $next_link_node->as_HTML() =~ /href\=\"([^"]*)\"/; my $base = $self->{'search_base_url'}; $self->{_next_url} = "$base$next_link"; $self->{_next_url} =~ s/\&\;/\&/gi; } return $result_count; } 1; # contributed by Craig Kadziolka =head1 NAME WWW::Search::Monster - A backend for searching www.monster.com jobs. =head1 SYNOPSIS my $oSearch = new WWW::Search('Monster'); my $sQuery = WWW::Search::escape_query("embedded software engineer"); $oSearch->native_query($sQuery, { country => 'US' }); while (my $oResult = $oSearch->next_result()) { print $oResult->title, "\n"; print $oResult->url, "\n"; } =head1 DESCRIPTION This class is a specialization of WWW::Search for searching job advertisements using monster.com. Note that the results will all be links to monster's own server. This class exports no public interface; all interaction should be done through WWW::Search objects. Works for any country connected to the monster network. It is possible to specify the country, using the 'country' parameter to the native_query method of WWW::Search. =head1 AUTHOR C is written by Craig Kadziolka, =head1 COPYRIGHT Copyright 2004 by Craig Kadziolka . All rights reserved. This program is free software; you may redistribute it and/or modify it under the same terms as Perl itself. =cut