package WWW::Google::News::TW;
use strict;
use warnings;
require Exporter;
our @ISA = qw(Exporter);
our @EXPORT_OK = qw(get_news get_news_for_topic);
our $VERSION = '0.06';
use Carp;
use LWP;
use URI::Escape;
sub get_news {
# Web version: http://news.google.com.tw/news?ned=tw
# plain text version : http://news.google.com.tw/news?ned=ttw
my $url = 'http://news.google.com.tw/news?ned=ttw';
my $ua = LWP::UserAgent->new;
$ua->agent('Mozilla/5.0');
my $response = $ua->get($url);
my $results = {};
return unless $response->is_success;
# "»" >>
# my $re1 = '([^<]*)
';
my $re1 = '
(.*?) | ';
my $re2 = '([^<]*)
'.
'([^<]*)'.
'\s?([^<]*)
'.
'([^<]*)...';
# my $re3 = '([^<]*)...';
my @sections = split /($re1)/m,$response->content;
my $current_section = '';
# print STDERR "total num is ".$#sections."\n";
foreach my $section (@sections) {
if ($section =~ m/$re1/m) {
$current_section = $1;
$current_section =~ s/ //g; # or put this (.*?)(?: )? in re1
#print STDERR $1,"\n";
} else {
my @stories = split /($re2)/mi,$section;
foreach my $story (@stories) {
if ($story =~ m/$re2/mi) {
if (!(exists($results->{$current_section}))) {
$results->{$current_section} = [];
}
my $story_h = {};
my( $url, $headline, $source, $update_time, $summary ) = ( $1, $2, $3, $4, $5 );
$story_h->{url} = $url;
$story_h->{headline} = $headline;
$story_h->{source} = $source;
$story_h->{source} =~ s/ -//g;
$story_h->{update_time} = $update_time;
$story_h->{summary} = $summary;
push(@{$results->{$current_section}},$story_h);
}
}
}
}
# print STDERR Dumper($results);
return $results;
}
sub get_news_for_topic {
my $topic = uri_escape( $_[0] );
my @results = ();
my $url = "http://news.google.com.tw/news?hl=zh-TW&ned=ttw&q=$topic";
my $ua = LWP::UserAgent->new();
$ua->agent('Mozilla/5.0');
my $response = $ua->get($url);
return unless $response->is_success;
# print STDERR $url."\n";
my $re1 = '(.*?)©2004 Google |