#!/usr/bin/perl use strict; use warnings; use Web::Scraper; use URI; use YAML; my $number = 1; my $emoji = scraper { process '//table[@width="100%" and @cellpadding="2"]//tr/td/font/../..', 'emoji[]' => scraper { # 264-266 are Skymail and 267-270 are J-PHONE chars, removed from their website $number += 4 if $number == 267; process '//td[2]/font', unicode => 'TEXT'; process '//td[3]/font', sjis => [ 'TEXT', sub { unpack "H*", shift } ]; process '//td[1]/img', image => [ '@src', sub { $_->as_string } ]; process '//td[1]', number => [ 'TEXT', sub { $number++ } ]; # /td[1] etc. is dummy }; result 'emoji'; }; my @urls = map "http://developers.softbankmobile.co.jp/dp/tool_dl/web/picword_0$_.php", 1..6; my $res; foreach my $url (@urls) { push @$res, @{$emoji->scrape(URI->new($url))} }; fill_sjisauto($res); binmode STDOUT, ":utf8"; print Dump $res; sub fill_sjisauto { my $res = shift; my $uni2sjisauto = YAML::LoadFile('dat/softbank-unicode2sjis_auto.yaml'); for my $row (@$res) { $row->{sjis_auto} = $uni2sjisauto->{$row->{unicode}}; } }