package Geo::OSM::MapFeatures; use warnings; use strict; use utf8; use Data::Dumper; use Error; use HTML::TableExtract qw(tree); use LWP::UserAgent; use URI::Escape qw(uri_escape); use Storable; use XML::Simple; use base qw(Class::Accessor); __PACKAGE__->mk_accessors(qw(api_url mapfeatures_pagename trace)); use Geo::OSM::MapFeatures::Feature; =head1 NAME Geo::OSM::MapFeatures - Parses and represents OpenStreetMap Map Features =head1 VERSION Version 0.10 =cut our $VERSION = '0.10'; =head1 SYNOPSIS use Geo::OSM::MapFeatures; my $mf = new Geo::OSM::MapFeatures; $mf->download(); $mf->parse(); # To print a simple ascii representation: foreach my $category ( sort( $mf->categories() ) ){ print "\n\n===== $category =====\n"; foreach my $feature ( $mf->features($category) ){ print "$feature\n"; } } # Or you can choose not to use the string overloading and get the # individual elements yourself: foreach my $category ( sort( $mf->categories() ) ){ print "\n\n===== $category =====\n"; foreach my $feature ( $mf->features($category) ){ print "Key: ".$feature->key()."\n"; print "Value(s): ". join("\n ", @{$feature->values()})."\n"; print "Description: ".$feature->description()."\n\n"; } } =head1 FUNCTIONS =head2 new (constructor) Create a new instance of this class. Pass parameters as a hashref. Parameters: =over 8 =item page What page to fetch. Defaults to "Map_Features". Translated pages work if the table header names are recognized, the module contains a mapping table with translated names in over a dozen languages. =back Returns: new instance of this class. =cut sub new { my ( $pkg, $params ) = @_; my $class = ref $pkg || $pkg; my $self = bless( {}, $class); if( $$params{page} ){ $self->mapfeatures_pagename($$params{page}); } else { $self->mapfeatures_pagename("Map_Features"); } $self->api_url("http://wiki.openstreetmap.org/api.php"); my %tableheader_translations = ( 'En' => { key => 'key', value => 'value', element => 'element', comment => 'comment', }, De => { key => 'Schlüssel', value => 'Wert', element => 'Element', comment => 'Kommentar', }, ES => { key => 'Clave', value => 'Valor', element => 'Elemento', comment => 'Comentario', }, FR => { key => 'Clé', value => 'Valeur', element => 'Élément', comment => 'Commentaire', }, IT => { key => 'Chiave', value => 'Valore', element => 'Elemento', comment => 'Spiegazione', }, Ja => { key => 'キー', value => '値', element => '要素', comment => '説明', }, Hu => { key => 'Kulcs', value => 'Érték', element => 'Alapelem', comment => 'Magyarázat', }, Pt => { key => 'Chave', value => 'Valor', element => 'Element', comment => 'Comentários', }, Ro => { key => 'Cheie', value => 'Valoare', element => 'Element', comment => 'Descriere', }, RU => { key => 'Ключ', value => 'Значение', element => 'Элементы', comment => 'Описание', }, SK => { key => 'Klúč', value => 'Hodnota', element => 'Element', comment => 'Komentár', }, Sv => { key => 'Nyckelord', value => 'Värde', element => 'Element', comment => 'Kommentar', }, Tr => { key => 'Anahtar', value => 'Değer', element => 'Öğe', comment => 'Açıklama', }, Lt => { key => 'Kategorija', value => 'Kodas', element => 'Įvedimo būdai', comment => 'Aprašymas', }, Uk => { key => 'Ключ', value => 'Значення', element => 'Елемент', comment => 'Пояснення', }, Traditional_Chinese => { key => '類別', value => '值', element => '元素', comment => '說明', }, ); # Build and compile regexes with all translations foreach my $string ( qw(key value element comment) ){ my @translations = (); foreach my $language ( values(%tableheader_translations) ){ push(@translations, $$language{$string}); } my $regex_string = join('|', @translations); $self->{tableheader_translation_regexes}{$string} = qr/$regex_string/i; } return $self; } =head2 download Downloads Map Features from wiki.openstreetmap.org. Throws exceptions if something goes wrong. Returns: undef =cut sub download { my $self = shift; # Setup HTTP useragent my $ua = LWP::UserAgent->new; $ua->agent("Geo_OSM_MapFeatures/$Geo::OSM::MapFeatures::VERSION"); # Fetch MW parser output of page my $req = HTTP::Request->new(GET => sprintf("%s?action=parse&prop=text&format=xml&page=%s", $self->api_url, $self->mapfeatures_pagename)); warn "Fetching ".$req->uri."\n" if $self->trace(); my $res = $ua->request($req); if( ! $res->is_success ){ throw Geo::OSM::MapFeatures::Error::Network(sprintf("Couldn't fetch %s: %s", $req->uri, $res->status_line)); } $self->{content} = XMLin($res->content); } =pod =head2 debug_download Download and cache in "mapfeatures.debug" in the current directory, to avoid downloading the page again and again when developing. For example do something like the following: unless( $ENV{MAPFEATURESDEBUG} ){ $mf->download(); } else { $mf->debug_download(); } =cut sub debug_download { my $self = shift; if( -f 'mapfeatures.debug' ){ my $data = retrieve('mapfeatures.debug') or die; $$self{content} = $$data{content}; } else { $self->download(); my $data = {content => $$self{content}}; store($data, 'mapfeatures.debug') or die; } } =head2 parse Parses map features. =cut sub parse { my $self = shift; throw Geo::OSM::MapFeatures::Error("No content, is it downloaded?") unless $self->{content}; throw Geo::OSM::MapFeatures::Error("Couldn't find element, something wrong with api.php?") unless $self->{content}{parse}{text}; my %data; my $tableextractor = HTML::TableExtract->new( # Get header translation regexes with a hash slice headers => [ @{$self->{tableheader_translation_regexes}}{qw(key value element comment)} ], ); $tableextractor->parse($$self{content}{parse}{text}); if( $tableextractor->tables == 0 ){ throw Geo::OSM::MapFeatures::Error::Parse("Did not find any tables"); } #DEBUG: $tableextractor->tree->dump; # Examine all matching tables foreach my $table ($tableextractor->tables) { # Find headings before the table but at the same level. # Loop through in reverse and find the first of each # heading level upwards my @headings = (); my $lowestheading = 10; foreach my $heading_elem ( reverse( grep { $_->tag() =~ /^h(?:\d)$/ } $table->tree->left ) ){ my( $headinglevel ) = $heading_elem->tag() =~ /^h(\d)$/; # Only store the first for a particular level next if defined($headings[$headinglevel]); # Don't store a small heading if we already saw something # larger. For example if we first saw h2 then h3 the h3 # belongs to the previous h2, not this one. next if $#headings && $headinglevel > $lowestheading; $lowestheading = $headinglevel; $headings[$headinglevel] = $heading_elem->as_trimmed_text; } @headings = grep { defined } @headings; my $have_added_in_table_heading; foreach my $row ($table->rows) { # If the first column is spanned it's probably a heading # dividing the table in parts. # Make sure to push exactly the last one of these onto # the list of headings if( $$row[0]->attr('colspan') ){ pop(@headings) if $have_added_in_table_heading; push(@headings, $$row[0]->as_trimmed_text); $have_added_in_table_heading++; next; } my $key = $$row[0]; my $value = $$row[1]; my $element = $$row[2]; my $description = $$row[3]; $key = $key->as_trimmed_text; # Elements are given by images with filenames Mf_(node|way|area).png. # This regex intentionally matches more, to make sure the module can detect that wikifiddlers have "invented" another element type or something my @elementtypes = map { $_->attr('src') =~ /Mf_(\w+)\./ } $element->find('img'); # Find values and split, either by
  • elements or by various text separators my @values; if( $value->find('li') ){ @values = map { $_->as_trimmed_text } $value->find('li'); } else { # Split on "/" (except for 24/7), "or" and "|" @values = split( m{\s*(?:(?as_trimmed_text ); } $description = $description->as_trimmed_text; #DEBUG: print "Row: k='$key' v='".join("','",@values)."' e='".join("','",@elementtypes)."' c='$description'\n"; my $feature = new Geo::OSM::MapFeatures::Feature($key, \@values, \@elementtypes, $description); #FIXME: There should be a real hierarchy, not just a category made by concatenating headings my $headingstring = join(' / ', @headings); push(@{$self->{features}{$headingstring}}, $feature); } } } =head2 categories Returns a list of feature categories. =cut sub categories { my $self = shift; return keys( %{ $self->{features} } ); } =head2 features Returns a list of features. If given an argument it as taken as a category, and only features in that category will be returned. =cut sub features { my $self = shift; my $category = shift; if( defined($category) ){ return @{ $self->{features}{$category} }; } else { my @result = (); foreach my $category ( $self->categories() ){ push(@result, $self->features($category)); } return @result; } } =head1 Exception classes =head2 Geo::OSM::MapFeatures::Error Base exception class for errors thrown by this module =cut package Geo::OSM::MapFeatures::Error; use base qw(Error); sub new { my $self = shift; my $text = "" . shift; my $params = shift; local $Error::Depth = $Error::Depth + 1; $self->SUPER::new(-text => $text, %$params); } sub stringify { my $self = shift; my $text = $self->SUPER::stringify; $text .= sprintf(" at %s line %d.\n", $self->file, $self->line) unless($text =~ /\n$/s); $text; } =head2 Geo::OSM::MapFeatures::Error::Network Network error =cut package Geo::OSM::MapFeatures::Error::Network; our @ISA = qw(Geo::OSM::MapFeatures::Error); =head2 Geo::OSM::MapFeatures::Error::Parse Go find out who broke map feature this time... =cut package Geo::OSM::MapFeatures::Error::Parse; our @ISA = qw(Geo::OSM::MapFeatures::Error); =head1 AUTHOR Knut Arne Bjørndal, C<< >> =head1 BUGS Categories are currently made by concatenating headings above a feature. This should probably be a proper hierarchy instead. The table header translation table should probably be easier to patch from programs calling the module. Or maybe even downloaded from the wiki or something. Please report any bugs or feature requests to C, or through the web interface at L. I will be notified, and then you'll automatically be notified of progress on your bug as I make changes. =head1 SUPPORT You can find documentation for this module with the perldoc command. perldoc Geo::OSM::MapFeatures You can also look for information at: =over 4 =item * RT: CPAN's request tracker L =item * AnnoCPAN: Annotated CPAN documentation L =item * CPAN Ratings L =item * Search CPAN L =back =head1 ACKNOWLEDGEMENTS =head1 COPYRIGHT & LICENSE Copyright 2008-2009 Knut Arne Bjørndal, all rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut 1; # End of Geo::OSM::MapFeatures