package WWW::Google::News::TW;
use strict;
use warnings;
require Exporter;
our @ISA = qw(Exporter);
our @EXPORT_OK = qw(get_news get_news_for_topic);
our $VERSION = '0.01';
use Carp;
use LWP;
use URI::Escape;
sub get_news {
# Web version: http://news.google.com.tw/news?ned=tw
# plain text version : http://news.google.com.tw/news?ned=ttw
my $url = 'http://news.google.com.tw/news?ned=ttw';
my $ua = LWP::UserAgent->new;
$ua->agent('Mozilla/5.0');
my $response = $ua->get($url);
my $results = {};
return unless $response->is_success;
# "»" >>
# my $re1 = '(.*?)';
my $re2 = '([^<]*)
';
my @sections = split /($re1)/m,$response->content;
my $current_section = '';
# print STDERR "total num is ".$#sections."\n";
foreach my $section (@sections) {
if ($section =~ m/$re1/m) {
$current_section = $1;
$current_section =~ s/ //g; # or put this (.*?)(?: )? in re1
#print STDERR $1,"\n";
} else {
my @stories = split /($re2)/mi,$section;
foreach my $story (@stories) {
if ($story =~ m/$re2/mi) {
if (!(exists($results->{$current_section}))) {
$results->{$current_section} = [];
}
my $story_h = {};
$story_h->{url} = $1;
$story_h->{headline} = $2;
push(@{$results->{$current_section}},$story_h);
}
}
}
}
# print STDERR Dumper($results);
return $results;
}
1;
__END__
=head1 NAME
WWW::Google::News::TW - Access to Google's Taiwan News Service (Not Usenet)
=head1 SYNOPSIS
use WWW:Google::News::TW qw(get_news);
my $results = get_news();
=head1 DESCRIPTION
This module provides a couple of methods to scrape results from Google Taiwan News, returning
a data structure similar to the following (which happens to be suitable to feeding into XML::RSS).
{
'社會' => [
{
'url' => 'http://udn.com/NEWS/LIFE/LIFS2/2233728.shtml',
'headline' => '中華電配線費四樓以下建物免收'
},
],
'焦點' => [
{
'url' => 'http://www.ettoday.com/2004/09/09/153-1683695.htm',
'headline' => '來去嘉義/阿里山鐵路接駁通車走走停停別有風味'
},
],
}
=head1 METHODS
=over 4
=item get_news()
Scrapes L and returns a reference
to a hash keyed on News Section, which points to an array of hashes keyed on URL and Headline.
=head1 TODO
* Add topic search
=head1 AUTHORS
Cheng-Lung Sung Eclsung@dragon2.netE
=head1 KUDOS
Greg McCarroll , Bowen Dwelle
for the basis of this module
=head1 COPYRIGHT
Copyright 2004 by Cheng-Lung Sung Eclsung@dragon2.netE.
This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
See L
=head1 SEE ALSO
L, L
=cut