package HTML::WikiConverter::GoogleCode; use warnings; use strict; use base 'HTML::WikiConverter'; use Params::Validate ':types'; use URI; =head1 NAME HTML::WikiConverter::GoogleCode - Convert HTML to Google Code wiki markup. =head1 SYNOPSIS use HTML::WikiConverter; my $wc = new HTML::WikiConverter( dialect => 'GoogleCode' ); print $wc->html2wiki( $html ); =head1 DESCRIPTION This module contains rules for converting HTML into Google Code wiki markup. See L for additional usage details. =head1 VERSION Version 0.12 =cut our $VERSION = '0.12'; =head1 FUNCTIONS =head2 rules Returns the HTML to wiki conversion rules =cut sub rules { my %rules = ( p => { block => 1, trim => 'both', line_format => 'multi' }, pre => { block => 1, start => "{{{\n", end => "\n}}}" }, i => { start => "_", end => "_", line_format => 'single' }, em => { alias => 'i' }, b => { start => "*", end => "*", line_format => 'single' }, strong => { alias => 'b' }, #u => { start => '__', end => '__', line_format => 'single' }, sup => { start => '^', end => '^', line_format => 'single' }, sub => { start => ',,', end => ',,', line_format => 'single' }, code => { start => '`', end => '`', line_format => 'single' }, tt => { alias => 'code' }, #small => { start => '~-', end => '-~', line_format => 'single' }, #big => { start => '~+', end => '+~', line_format => 'single' }, a => { replace => \&_link }, img => { replace => \&_image }, ul => { line_format => 'multi', block => 1, line_prefix => ' ' }, ol => { alias => 'ul' }, li => { start => \&_li_start, trim => 'leading' }, #dl => { line_format => 'multi' }, #dt => { trim => 'both', end => ':: ' }, #dd => { trim => 'both' }, hr => { replace => "\n----\n"}, br => { replace => "\n" }, table => { block => 1, line_format => 'multi' }, tr => { end => "||\n", line_format => 'single' }, td => { start => '|| ', end => ' ', trim => 'both' }, th => { alias => 'td' }, ); # Headings (h1-h6) my @headings = ( 1..6 ); foreach my $level ( @headings ) { my $tag = "h$level"; my $affix = ( '=' ) x ($level); $affix = '======' if $level == 6; $rules{$tag} = { start => $affix.' ', end => ' '.$affix, block => 1, trim => 'both', line_format => 'single' }; } return \%rules; } =head2 attributes Returns the conversion L particular to the GoogleCode dialect =cut sub attributes { { escape_autolink => {default => [], type => ARRAYREF}, summary => {default => 0, type => SCALAR}, labels => {default => [], type => ARRAYREF} }} sub _li_start { my( $self, $node, $rules ) = @_; my $bullet = ''; $bullet = '*' if $node->parent->tag eq 'ul'; $bullet = '#' if $node->parent->tag eq 'ol'; return "\n$bullet "; } sub _link { my( $self, $node, $rules ) = @_; # (bug #17813) my $name = $node->attr('name'); my $url = $node->attr('href') || ''; my $text = $self->get_elem_contents($node) || ''; # (bug #17813) if( $self->_abs2rel($url) =~ /^#/ ) { $url = $self->_abs2rel($url); } return $url if $url eq $text; return "[$url $text]"; } sub _abs2rel { my( $self, $uri ) = @_; return $uri unless $self->base_uri; return URI->new($uri)->rel($self->base_uri)->as_string; } sub _image { my( $self, $node, $rules ) = @_; return $node->attr('src') ? ('[' . $node->attr('src') . ']') : ''; } =head2 preprocess_node HTML element specific pre-processing =cut sub preprocess_node { my( $self, $node ) = @_; $self->strip_aname($node) if $node->tag and $node->tag eq 'a'; $self->caption2para($node) if $node->tag and $node->tag eq 'caption'; # (bug #17813) if($node->tag and $node->tag eq 'a' and $node->attr('name') ) { my $name = $node->attr('name'); $node->preinsert( new HTML::Element('a', name => $name) ); $node->attr( name => undef ); } } =head2 preprocess_tree HTML document specific pre-processing =cut sub preprocess_tree { my ($self, $root) = @_; $self->_escape_autolink($root); } # escape Google wiki autolinking of specific CamelCase words # words in attribute escape_autolink sub _escape_autolink { my ($self, $parent) = @_; foreach my $child ($parent->content_list) { if($child->tag eq '~text') { my %toChange; my $theText = $child->attr('text'); while($theText =~ /\b(\w+)\b/g) { if ($self->is_camel_case($1)) { if(grep(/^$1$/, @{$self->escape_autolink})) { $toChange{$1} = undef; } } } foreach my $val (keys %toChange) { $theText =~ s/$val/!$val/g; } $child->attr('text', $theText); } else { unless($child->tag eq 'pre') { $self->_escape_autolink($child); } } } } my @protocols = qw( http https mailto ); my $urls = '(' . join('|', @protocols) . ')'; my $ltrs = '\w'; my $gunk = '\/\#\~\:\.\?\+\=\&\%\@\!\-'; my $punc = '\.\:\?\-\{\(\)\}'; my $any = "${ltrs}${gunk}${punc}"; my $url_re = "\\b($urls:\[$any\]+?)(?=\[$punc\]*\[^$any\])"; =head2 postprocess_output Wiki document post-processing =cut sub postprocess_output { my( $self, $outref ) = @_; $$outref =~ s/($url_re)\[\[BR\]\]/$1 [[BR]]/go; # add 'summary' and 'labels' wiki markup elements my $additional_markup = ''; if($self->summary) { $additional_markup = '#summary ' . $self->summary . "\n"; } if(@{$self->labels}) { $additional_markup .= '#labels ' . join(',', @{$self->labels}) . "\n"; } $$outref = $additional_markup . $$outref; } =head1 ATTRIBUTES In addition to the regular set of attributes recognized by the HTML::WikiConverter constructor, this dialect also accepts the following attributes that can be passed into the C constructor. See L for usage details. =head2 escape_autolink A reference to an array of CamelCase words for which Google Code wiki autolink-ing should be escaped by preceeding the word with a !. =head2 summary Text to be produced in the 'summary' wiki markup element. The summary element appears in the index page of the project's wiki. =head2 labels A reference to an array of text values to be produced in the 'labels' wiki markup element. Allowed values for a project can be found on the project's Google Code web-site on the C tab. =head1 AUTHOR Marty Kube, C<< >> =head1 BUGS Please report any bugs or feature requests to C, or through the web interface at L. I will be notified, and then you'll automatically be notified of progress on your bug as I make changes. =head1 SUPPORT You can find documentation for this module with the perldoc command. perldoc HTML::WikiConverter::GoogleCode You can also look for information at: =over 4 =item * AnnoCPAN: Annotated CPAN documentation L =item * CPAN Ratings L =item * RT: CPAN's request tracker L =item * Search CPAN L =back =head1 ACKNOWLEDGEMENTS This module is based on the L module by David J. Iberri, C<< >> =head1 COPYRIGHT & LICENSE Copyright 2008 Marty Kube, all rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut 1; # End of HTML::WikiConverter::GoogleCode