package HTML::BBCode; =head1 NAME HTML::BBCode - Perl extension for converting BBcode to HTML. =head1 SYNOPSIS use HTML::BBCode; my $bbc = HTML::BBCode->new( \%options ); my $html = $bbc->parse($bbcode); # Input print $bbc->{bbcode}; # Output print $bbc->{html}; =head1 DESCRIPTION C converts BBCode -as used on the phpBB bulletin boards- to its HTML equivalent. Please note that, although this was the first BBCode module, it's by far not the best nor fastest. It's also not heavilly maintained, so you might want to look at L and L. =head2 METHODS The following methods can be used =head3 new my $bbc = HTML::BBCode->new({ allowed_tags => [ @bbcode_tags ], stripscripts => 1, linebreaks => 1, }); C creates a new C object using the configuration passed to it. The object's default configuration allows all BBCode to be converted to the default HTML. =head4 options =over 5 =item allowed_tags Defaults to all currently know C, being: b, u, i, color, size, quote, code, list, url, email, img. With this option, you can specify what BBCode tags you would like to convert. =item stripscripts Enabled by default, this option will remove all the XSS trickery (and thus is probably best not to turn it off). =item no_html This option has been removed since version 2.0 =item no_jslink This option has been removed since version 2.0 =item linebreaks Disabled by default. When true, will substitute linebreaks into HTML ('
') =back =head3 parse my $html = $bbc->parse($bbcode); Parses text supplied as a single scalar string and returns the HTML as a single scalar string. =head1 CAVEAT: API CHANGES Please do note that the C, C, C options in the new method have been removed since version 2.0 due to the XSS protection (provided by L). This will most likely break your current scripts (if you used the C option). =head1 SEE ALSO =over 4 =item * L =item * L =item * L, L, L =back =head1 BUGS C. Please report bugs to L. =head1 AUTHOR Menno Blom, Eblom@cpan.orgE =head1 COPYRIGHT AND LICENSE Copyright (C) 2004-2009 by Menno Blom This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut #------------------------------------------------------------------------------# use strict; use warnings; use HTML::BBCode::StripScripts; our $VERSION = '2.07'; our @bbcode_tags = qw(code quote b u i color size list url email img); sub new { my ($class, $args) = @_; $args ||= {}; $class->_croak("Options must be a hash reference") if ref($args) ne 'HASH'; my $self = {}; bless $self, $class; $self->_init($args) or return undef; return $self; } sub _init { my ($self, $args) = @_; my %html_tags = ( code => '
Code:
'. '
%s
', quote => '
%s
'. '
%s
', b => '%s', u => '%s', i => '%s', color => '%s', size => '%s', url => '%s', email => '%s', img => '', ul => '
    %s
', ol_number => '
    %s
', ol_alpha => '
    %s
', ); my %options = ( allowed_tags=> \@bbcode_tags, html_tags => \%html_tags, stripscripts => 1, linebreaks => 0, %{ $args }, ); $self->{options} = \%options; $self->{'hss'} = HTML::BBCode::StripScripts->new({ Context => 'Flow', AllowSrc => 1, AllowMailto => 1, AllowHref => 1, AllowRelURL => 1, EscapeFiltered => 1, BanAllBut => [qr/a div img li ol span ul/], Rules => { br => 1, img => { required => ['src'], 'src' => 1, 'alt' => 1, '*' => 0, }, a => { required => ['href'], 'href' => 1, '*' => 0, }, img => { 'src' => 1, 'alt' => 1, '*' => 0, }, div => { class => qr{^bbcode_}, '*' => 0, }, span => { style => \&_filter_style, '*' => 0, }, ol => { style => qr/^list-style-type:lower-alpha$/, '*' => 0, }, ul => 1, li => 1, } }); return $self; } # Parse the input! sub parse { my ($self, $bbcode) = @_; return if(!defined $bbcode); $self->{_stack} = []; $self->{_in_code_block} = 0; $self->{_skip_nest} = ''; $self->{_nest_count} = 0; $self->{_nest_count_stack} = 0; $self->{_dont_nest} = ['code', 'url', 'email', 'img']; $self->{bbcode} = ''; $self->{html} = ''; $self->{bbcode} = $bbcode; my $input = $bbcode; main: while(1) { # End tag if($input =~ /^(\[\/[^\]]+\])/s) { my $end = lc $1; if(($self->{_skip_nest} ne '' && $end ne "[/$self->{_skip_nest}]") || ($self->{_in_code_block} && $end ne "[/code]")) { _content($self, $end); } else { _end_tag($self, $end); } $input = $'; } # Opening tag elsif($input =~ /^(\[[^\]]+\])/s ) { if($self->{_in_code_block}) { _content($self, $1); } else { _open_tag($self, $1); } $input = $'; } # None BBCode content till next tag elsif($input =~ /^([^\[]+)/s) { _content($self, $1); $input = $'; } # BUG #14138 unmatched bracket, content till end of input elsif($input =~ /^(.+)$/s) { _content($self, $1); $input = $'; } # Now what? else { last main if(!$input); # We're at the end now, stop parsing! } } $self->{html} = join('', @{$self->{_stack}}); return $self->{options}->{stripscripts} ? $self->_stripscripts() : $self->{html}; } sub _open_tag { my ($self, $open) = @_; my ($tag, $rest) = $open =~ m/\[([^=\]]+)(.*)?\]/s; # Don't do this! ARGH! $tag = lc $tag; if(_dont_nest($self, $tag) && $tag eq 'img') { $self->{_skip_nest} = $tag; } if($self->{_skip_nest} eq $tag) { $self->{_nest_count}++; $self->{_nest_count_stack}++; } $self->{_in_code_block}++ if($tag eq 'code'); push @{$self->{_stack}}, '['.$tag.$rest.']'; } sub _content { my ($self, $content) = @_; $content =~ s|\r*||gs; $content =~ s|\n|
\n|gs if($self->{options}->{linebreaks} && $self->{_in_code_block} == 0); push @{$self->{_stack}}, $content; } sub _end_tag { my ($self, $end) = @_; my ($tag, $arg); my @buf = ( $end ); if("[/$self->{_skip_nest}]" eq $end && $self->{_nest_count} > 1) { push @{$self->{_stack}}, $end; $self->{_nest_count}--; return; } $self->{_in_code_block} = 0 if($end eq '[/code]'); # Loop through the stack while(1) { my $item = pop(@{$self->{_stack}}); push @buf, $item; if(!defined $item) { map { push @{$self->{_stack}}, $_ if($_) } reverse @buf; last; } if("[$self->{_skip_nest}]" eq "$item") { $self->{_nest_count_stack}--; next if($self->{_nest_count_stack} > 0); } $self->{_nest_count}-- if("[/$self->{_skip_nest}]" eq $end && $self->{_nest_count} > 0) ; if($item =~ /\[([^=\]]+).*\]/s) { $tag = $1; if ($tag && $end eq "[/$tag]") { push @{$self->{_stack}}, (_is_allowed($self, $tag)) ? _do_BB($self, @buf) : reverse @buf; # Clear the _skip_nest? $self->{_skip_nest} = '' if(defined $self->{_skip_nest} && $tag eq $self->{_skip_nest}); last; } } } $self->{_nest_count_stack} = 0; } sub _do_BB { my ($self, @buf) = @_; my ($tag, $attr); my $html; # Get the opening tag my $open = pop(@buf); # We prefer to read in non-reverse way @buf = reverse @buf; # Closing tag is kinda useless, pop it pop(@buf); # Rest should be content; my $content = join(' ', @buf); # What are we dealing with anyway? Any attributes maybe? if($open =~ /\[([^=\]]+)=?([^\]]+)?]/) { $tag = $1; $attr = $2; } # Kludgy way to handle specific BBCodes ... if($tag eq 'quote') { $html = sprintf($self->{options}->{html_tags}->{quote}, ($attr) ? "$attr wrote:" : "Quote:", $content ); } elsif($tag eq 'code') { $html = sprintf($self->{options}->{html_tags}->{code}, _code($content)); } elsif($tag eq 'list') { $html = _list($self, $attr, $content); } elsif(($tag eq 'email' || $tag eq 'url') && !$attr) { $html = sprintf($self->{options}->{html_tags}->{$tag}, $content,$content); } elsif ($attr) { $html = sprintf($self->{options}->{html_tags}->{$tag}, $attr, $content); } else { $html = sprintf($self->{options}->{html_tags}->{$tag}, $content); } # Return ... return $html; } sub _is_allowed { my ($self, $check) = @_; map { return 1 if ($_ eq $check); } @{$self->{options}->{allowed_tags}}; return 0; } sub _dont_nest { my ($self, $check) = @_; map { return 1 if($_ eq $check); } @{$self->{_dont_nest}}; return 0; } sub _code { my $code = shift; $code =~ s|^\s+?[\n\r]+?||; $code =~ s|<|\<|g; $code =~ s|>|\>|g; $code =~ s|\[|\[|g; $code =~ s|\]|\]|g; $code =~ s| |\ |g; $code =~ s|\n|
|g; return $code; } sub _list { my ($self, $attr, $content) = @_; $content =~ s|^
[\s\r\n]*|\n|s; $content =~ s|\[\*\]([^\[]+)|_list_removelastbr($1)|egs; $content =~ s|
$|\n|s; if($attr) { return sprintf($self->{options}->{html_tags}->{ol_number}, $content) if($attr =~ /^\d/); return sprintf($self->{options}->{html_tags}->{ol_alpha}, $content) if($attr =~ /^\D/); } else { return sprintf($self->{options}->{html_tags}->{ul}, $content); } } sub _list_removelastbr { my $content = shift; $content =~ s|
[\s\r\n]*$||; $content =~ s|^\s*||; $content =~ s|\s*$||; return "
  • $content
  • \n"; } sub _stripscripts { my $self = shift; $self->{'html'} = $self->{'hss'}->filter_html($self->{'html'}); return $self->{'html'}; } sub _filter_style { my ($filter, $tag, $attr_name, $attr_val) = @_; if ($attr_val eq 'font-weight:bold' or $attr_val eq 'text-decoration:underline' or $attr_val eq 'font-style:italic' or $attr_val eq 'list-style-type') { return $attr_val; } if ( my ($color) = $attr_val =~ /^color:(.*)/ ) { my @html_color = qw/ black gray maroon red green lime olive yellow navy blue purple fuchsia teal aqua silver white /; return $attr_val if $color =~ /^#[a-fA-F\d]{6}$/; return $attr_val if $color =~ /^#[a-fA-F\d]{3}$/; return $attr_val if grep { $color eq $_ } @html_color; return undef; } if ( $attr_val =~ /font-size:\d+px/ ) { return $attr_val; } return undef; } sub _croak { my ($class, @error) = @_; require Carp; Carp::croak(@error); } 1;