package Pod::POM::View::HTML::Filter; use Pod::POM::View::HTML; our @ISA = qw( Pod::POM::View::HTML ); use warnings; use strict; use Carp; our $VERSION = '0.09'; my %filter; our %builtin = ( default => { code => sub { my $s = shift; $s =~ s/&/&/g; $s =~ s/</g; $s =~ s/>/>/g; $s; }, verbatim => 1, }, perl_tidy => { code => \&perl_tidy_filter, requires => [qw( Perl::Tidy )], verbatim => 1, alias => [qw( perl )], }, perl_ppi => { code => \&perl_ppi_filter, requires => [qw( PPI PPI::HTML )], verbatim => 1, alias => [qw( ppi )], }, html => { code => \&html_filter, requires => [qw( Syntax::Highlight::HTML )], verbatim => 1, }, shell => { code => \&shell_filter, requires => [qw( Syntax::Highlight::Shell )], verbatim => 1, }, kate => { code => \&kate_filter, requires => [qw( Syntax::Highlight::Engine::Kate )], verbatim => 1, }, wiki => { code => \&wiki_filter, requires => [qw( Text::WikiFormat )], verbatim => 0, }, wikimedia => { code => \&wikimedia_filter, requires => [qw( Text::MediawikiFormat )], verbatim => 0, }, ); # automatically register built-in handlers my $INIT = 1; Pod::POM::View::HTML::Filter->add( %builtin ); $INIT = 0; # # Specific methods # sub new { my $class = shift; return $class->SUPER::new( auto_unindent => 1, @_, filter => {}, # instance filters FILTER => [], # stack maintaining info for filters ); } sub add { my ($self, %args) = @_; my $filter = $self->__filter(); for my $lang ( keys %args ) { my $nok = 0; if( exists $args{$lang}{requires} ) { for ( @{ $args{$lang}{requires} } ) { eval "require $_;"; if ($@) { $nok++; carp "$lang\: pre-requisite $_ could not be loaded" unless $INIT; # don't warn for built-ins } } } croak "$lang: no code parameter given" unless exists $args{$lang}{code}; if ( !$nok ) { $filter->{$lang} = $args{$lang}; if ( $args{$lang}{alias} ) { $filter->{$_} = $args{$lang} for @{ $args{$lang}{alias} }; } } } } sub delete { my ( $self, $lang ) = @_; my $filter = $self->__filter(); my $old = $self->_filter()->{$lang}; $filter->{$lang} = undef; return $old; } # return a hashref of current filters for the class|instance sub _filter { my ($self) = @_; my $filter = ref $self && UNIVERSAL::isa( $self, 'Pod::POM::View::HTML::Filter' ) ? { %filter, %{ $self->{filter} } } : \%filter; $filter->{$_} || delete $filter->{$_} for keys %$filter; return $filter; } # return the real inner filter list for the class|instance sub __filter { my ($self) = @_; return ref $self && UNIVERSAL::isa( $self, 'Pod::POM::View::HTML::Filter' ) ? $self->{filter} : \%filter; } sub know { my ($self, $lang) = @_; return exists $self->_filter()->{$lang}; } sub filters { keys %{ $_[0]->_filter() }; } # # overridden Pod::POM::View::HTML methods # sub view_for { my ($self, $for) = @_; my $format = $for->format; my $filter = $self->_filter(); return $for->text() . "\n\n" if $format =~ /^html\b/; if ( $format =~ /^filter\b/ ) { my $args = (split '=', $format, 2)[1]; return '' unless defined $args; # silently skip my $text = $for->text; my $verbatim = 0; # select the filters and options my @langs; for my $lang (split /\|/, $args) { ( $lang, my $opts ) = ( split( ':', $lang, 2 ), '' ); $opts =~ y/:/ /; $lang = exists $filter->{$lang} ? $lang : 'default'; push @langs, [ $lang, $opts ]; $verbatim++ if $filter->{$lang}{verbatim}; } # cancel filtering if one filter is missing @langs = ( grep { $_->[0] eq 'default' } @langs ) ? ( [ 'default', '' ] ) : @langs; # process the text $text = $filter->{ $_->[0] }{code}->( $text, $_->[1] ) for @langs; return $verbatim ? "
$text\n" : "$text\n"; } # fall-through return ''; } sub view_begin { my ($self, $begin) = @_; my ($format, $args) = split(' ', $begin->format(), 2); my $filter = $self->_filter(); if ( $format eq 'html' ) { return $self->SUPER::view_begin( $begin ); } elsif( $format eq 'filter' ) { my @filters = map { s/^\s*|\s*$//g; $_ } split /\|/, $args; # fetch the text and verbatim blocks in the begin section # and remember the type of each block my $verbatim = 0; my $prev = ''; my $text = ''; for my $item ( @{ $begin->content } ) { $text .= ($prev ? "\n\n" :'') . $item->text(); $prev = 1; $verbatim++ if $item->type eq 'verbatim'; } # a block is verbatim only if all subblocks are verbatim $verbatim = 0 if $verbatim != @{ $begin->content }; # select the filters and options my @langs; for my $f (@filters) { my ( $lang, $opts ) = split( ' ', $f, 2 ); $lang = exists $filter->{$lang} ? $lang : 'default'; push @langs, [ $lang, $opts ]; $verbatim++ if $filter->{$lang}{verbatim}; } # cancel filtering if one filter is missing @langs = ( grep { $_->[0] eq 'default' } @langs ) ? ( [ 'default', '' ] ) : @langs; # process the text ( my $indent, $text ) = _unindent($text) if $self->{auto_unindent}; $text = $filter->{ $_->[0] }{code}->( $text, $_->[1] ) for @langs; $text =~ s/^(?=.+)/$indent/gm if $self->{auto_unindent}; # the enclosing tags depend on the block and the last filter return $verbatim ? "
$text\n" : "$text\n"; } # fall-through return ''; } # # utility functions # # a simple filter output cleanup routine sub _cleanup { local $_ = shift; s!\A
\n?|\n?\n\z!!gm; # remove $_; } sub _unindent { my $str = shift; my $indent; while ( $str =~ /^( *)\S/gmc ) { $indent = !defined $indent ? $1 : length($1) < length($indent) ? $1 : $indent; } $indent ||= ''; $str =~ s/^$indent//gm; return ( $indent, $str ); } # # builtin filters # # a cache for multiple parsers with the same options my %filter_parser; # Perl highlighting, thanks to Perl::Tidy sub perl_tidy_filter { my ($code, $opts) = ( shift, shift || "" ); my $output = ""; # Perl::Tidy 20031021 uses Getopt::Long and expects the default config # this is a workaround (a patch was sent to Perl::Tidy's author) my $glc = Getopt::Long::Configure(); Getopt::Long::ConfigDefaults(); Perl::Tidy::perltidy( source => \$code, destination => \$output, argv => "-html -pre -nopod2html " . $opts, stderr => '-', errorfile => '-', ); $output = _cleanup( $output ); # remove # put back Getopt::Long previous configuration, if needed Getopt::Long::Configure( $glc ); return $output; } # Perl highlighting, thanks to PPI::HTML sub perl_ppi_filter { my ($code, $opts) = ( shift, shift || ''); # PPI::HTML options my %ppi_opt = map { !/=/ && s/$/=1/ ; split /=/, $_, 2 } split / /, $opts; # create PPI::HTML syntax highlighter my $highlighter = $filter_parser{ppi}{$opts} ||= PPI::HTML->new(%ppi_opt); # highlight the code and clean up the resulting HTML my $pretty = $highlighter->html(\$code); $pretty =~ s/
The resulting HTML will look like this (modulo the stylesheet):
# now in full colour! $A++;
$A++; # this works too
This should read bar bar bar:
bar bar bar
=end html =head1 DESCRIPTION This module is a subclass of C
A camel>> and C<<>> tags. Otherwise, no special tags will be added (his is left to the formatter). =head2 Examples An example of the power of pipes can be seen in the following example. Take a bit of Perl code to colour: =begin filter perl "hot cross buns" =~ /cross/; print "Matched: <$`> $& <$'>\n"; # Matched:
"hot cross buns" =~ /cross/; print "Matched: <$`> $& <$'>\n"; # Matched: <hot > cross < buns> print "Left: <$`>\n"; # Left: <hot > print "Match: <$&>\n"; # Match: <cross> print "Right: <$'>\n"; # Right: < buns>=begin html
Which your browser will render as:
"hot cross buns" =~ /cross/; print "Matched: <$`> $& <$'>\n"; # Matched: <hot > cross < buns> print "Left: <$`>\n"; # Left: <hot > print "Match: <$&>\n"; # Match: <cross> print "Right: <$'>\n"; # Right: < buns>=end html Now if you want to colour and number the HTML code produced, it's as simple as tackling the C on top of the C
1 <span class="q">"hot cross buns"</span> =~ <span class="q">/cross/</span><span class="sc">;</span> 2 <span class="k">print</span> <span class="q">"Matched: <$`> $& <$'>\n"</span><span class="sc">;</span> <span class="c"># Matched: <hot > cross < buns></span> 3 <span class="k">print</span> <span class="q">"Left: <$`>\n"</span><span class="sc">;</span> <span class="c"># Left: <hot ></span> 4 <span class="k">print</span> <span class="q">"Match: <$&>\n"</span><span class="sc">;</span> <span class="c"># Match: <cross></span> 5 <span class="k">print</span> <span class="q">"Right: <$'>\n"</span><span class="sc">;</span> <span class="c"># Right: < buns></span>=begin html
But your your browser will render it as:
1 <span class="q">"hot cross buns"</span> =~ <span class="q">/cross/</span><span class="sc">;</span> 2 <span class="k">print</span> <span class="q">"Matched: <$`> $& <$'>\n"</span><span class="sc">;</span> <span class="c"># Matched: <hot > cross < buns></span> 3 <span class="k">print</span> <span class="q">"Left: <$`>\n"</span><span class="sc">;</span> <span class="c"># Left: <hot ></span> 4 <span class="k">print</span> <span class="q">"Match: <$&>\n"</span><span class="sc">;</span> <span class="c"># Match: <cross></span> 5 <span class="k">print</span> <span class="q">"Right: <$'>\n"</span><span class="sc">;</span> <span class="c"># Right: < buns></span>=end html =head2 Caveats There were a few things to keep in mind when mixing verbatim and text paragraphs in a C<=begin> block. These problems do not exist any more as from version 0.06. =over 4 =item Text paragraphs are not processed for POD escapes any more Because the C<=begin> / C<=end> block is now processed as a single string of text, the following block: =begin filter html B
B<foo>=begin html
This will be rendered by your web browser as:
B<foo>=end html And the same text in a verbatim block =begin filter html B
B<foo>=begin html
Which a web browser will render as:
B<foo>=end html Which looks quite the same, doesn't it? =item Separate paragraphs aren't filtered separately any more As seen in L, the filter now processes the begin block as a single string of text. So, if you have a filter that replace each C<*> character with an auto-incremented number in square brackets, like this: $view->add( notes => { code => sub { my ( $text, $opt ) = @_; my $n = $opt =~ /(\d+)/ ? $1 : 1; $text =~ s/\*/'[' . $n++ . ']'/ge; $text; } } ); And you try to process the following block: =begin filter notes 2 TIMTOWDI*, but your library should DWIM* when possible. You can't always claims that PICNIC*, can you? =end filter You'll get the expected result (contrary to previous versions):
TIMTOWDI[2], but your library should DWIM[3] when possible. You can't always claims that PICNIC[4], can you?
The filter was really called only once, starting at C<2>, just like requested. Future versions of C option is a reference to the filter routine. The
filter must take a string as its only argument and return the formatted
HTML string (coloured accordingly to the language grammar, hopefully).
Available options are:
Name Type Content
---- ---- -------
code CODEREF filter implementation
verbatim BOOLEAN if true, force the full content of the
=begin/=end block to be passed verbatim
to the filter
requires ARRAYREF list of required modules for this filter
Note that C is both a class and an instance method.
When used as a class method, the new language is immediately available
for all future and existing instances.
When used as an instance method, the new language is only available for
the instance itself.
=item C
Remove the given language from the list of class or instance filters.
The deleted filter is returned by this method.
C is both a class and an instance method, just like C.
=item C
Return the list of languages supported.
=item C )>
Return true if the view knows how to handle language C<$lang>.
=back
=head2 Overloaded methods
The following C methods are overridden in
C:
=over 4
=item C
The overloaded constructor initialises some internal structures.
This means that you'll have to use a instance of the class as a
view for your C object. Therefore you must use C.
$Pod::POM::DEFAULT_VIEW = 'Pod::POM::View::HTML::Filter'; # WRONG
$pom->present( 'Pod::POM::View::HTML::Filter' ); # WRONG
# this is CORRECT
$Pod::POM::DEFAULT_VIEW = Pod::POM::View::HTML::Filter->new;
# this is also CORRECT
my $view = Pod::POM::View::HTML::Filter->new;
$pom->present( $view );
The only option at this time is C, which is enabled by
default. This option remove leading indentation from all verbatim blocks
within the begin blocks, and put it back after highlighting.
=item C
=item C
These are the methods that support the C format.
=back
=head1 FILTERS
=head2 Built-in filters
C is shipped with a few built-in filters.
The name for the filter is obtained by removing C<_filter> from the
names listed below (except for C):
=over 4
=item default
This filter is called when the required filter is not known by
C. It does nothing more than normal POD
processing (POD escapes for text paragraphs and C<< >> for
verbatim paragraphs.
You can use the C method to remove a filter and therefore
make it behave like C.
=item perl_tidy_filter
This filter does Perl syntax highlighting with a lot of help from
C.
It accepts options to C, such as C<-nnn> to number lines of
code. Check C's documentation for more information about
those options.
=item perl_ppi_filter
This filter does Perl syntax highlighting using C, which is
itself based on the incredible C.
It accepts the same options as C, which at this time solely
consist of C to, as one may guess, add line numbers to the
output.
=item html_filter
This filter does HTML syntax highlighting with the help of
C.
The filter supports C options:
=begin filter html nnn=1
The lines of the HTML code will be numbered.
This is line 2.
=end filter
See C for the list of supported options.
=item shell_filter
This filter does shell script syntax highlighting with the help of
C.
The filter supports C options:
=begin filter shell nnn=1
#!/bin/sh
echo "This is a foo test" | sed -e 's/foo/shell/'
=end filter
See C for the list of supported options.
=item kate_filter
This filter support syntax highlighting for numerous languages
with the help of C.
The filter supports C languages as options:
=begin filter kate Diff
Index: lib/Pod/POM/View/HTML/Filter.pm
===================================================================
--- lib/Pod/POM/View/HTML/Filter.pm (revision 99)
+++ lib/Pod/POM/View/HTML/Filter.pm (working copy)
@@ -27,6 +27,11 @@
requires => [qw( Syntax::Highlight::Shell )],
verbatim => 1,
},
+ kate => {
+ code => \&kate_filter,
+ requires => [qw( Syntax::Highlight::Engine::Kate )],
+ verbatim => 1,
+ },
);
my $HTML_PROTECT = 0;
=end filter
Check the C documentation for the full
list of supported languages. Please note that some of them aren't well
supported yet (by C), so the output
may not be what you expect.
Here is a list of languages we have successfully tested with
C version 0.02:
C, C, C, C, C, C.
=item wiki_filter
This filter converts the wiki format parsed by C
in HTML.
The supported options are: C, C, C,
C. The option and value are separated by a C<=> character,
as in the example below:
=begin filter wiki extended=1
[link|title]
=end
=item wikimedia_filter
This filter converts the wiki format parsed by C
in HTML.
The supported options are: C, C, C,
C and C. The option and value are separated
by a C<=> character.
=back
=head2 Writing your own filters
Write a filter is quite easy: a filter is a subroutine that takes two
arguments (text to parse and option string) and returns the filtered
string.
The filter is added to C's internal filter
list with the C method:
$view->add(
foo => {
code => \&foo_filter,
requires => [],
}
);
When presenting the following piece of pod,
=begin filter foo bar baz
Some text to filter.
=end filter
the C routine will be called with two arguments, like this:
foo_filter( "Some text to filter.", "bar baz" );
If you have a complex set of options, your routine will have to parse
the option string by itself.
Please note that in a C<=for> construct, whitespace in the option string
must be replaced with colons:
=for filter=foo:bar:baz Some text to filter.
The C routine will be called with the same two arguments
as before.
=head1 BUILT-IN FILTERS CSS STYLES
Each filter uses its own CSS classes, so that one can define their
favourite colours in a custom CSS file.
=head2 C filter
C's HTML code looks like:
$A++;
Here are the classes used by C:
n numeric
p paren
q quote
s structure
c comment
v v-string
cm comma
w bareword
co colon
pu punctuation
i identifier
j label
h here-doc-target
hh here-doc-text
k keyword
sc semicolon
m subroutine
pd pod-text
=head2 C filter
C uses the following CSS classes:
comment
double
heredoc_content
interpolate
keyword for language keywords (my, use
line_number
number
operator for language operators
pragma for pragmatas (strict, warnings)
single
structure for syntaxic symbols
substitute
symbol
word for module, function and method names
words
match
=head2 C filter
C makes use of the following classes:
h-decl declaration # declaration
h-pi process # process instruction
h-com comment # comment
h-ab angle_bracket # the characters '<' and '>' as tag delimiters
h-tag tag_name # the tag name of an element
h-attr attr_name # the attribute name
h-attv attr_value # the attribute value
h-ent entity # any entities: é «
=head2 C filter
C makes use of the following classes:
s-key # shell keywords (like if, for, while, do...)
s-blt # the builtins commands
s-cmd # the external commands
s-arg # the command arguments
s-mta # shell metacharacters (|, >, \, &)
s-quo # the single (') and double (") quotes
s-var # expanded variables: $VARIABLE
s-avr # assigned variables: VARIABLE=value
s-val # shell values (inside quotes)
s-cmt # shell comments
=head2 C filter
Output formatted with C makes use
of the following classes:
k-alert # Alert
k-basen # BaseN
k-bstring # BString
k-char # Char
k-comment # Comment
k-datatype # DataType
k-decval # DecVal
k-error # Error
k-float # Float
k-function # Function
k-istring # IString
k-keyword # Keyword
k-normal # Normal
k-operator # Operator
k-others # Others
k-regionmarker # RegionMarker
k-reserved # Reserved
k-string # String
k-variable # Variable
k-warning # Warning
=head1 HISTORY
The goal behind this module was to produce nice looking HTML pages from the
articles the French Perl Mongers are writing for the French magazine
GNU/Linux Magazine France (L ).
The resulting web pages can be seen at
L .
=head1 AUTHOR
Philippe "BooK" Bruhat, C<< >>
=head1 THANKS
Many thanks to Sébastien Aperghis-Tramoni (Maddingue), who helped
debugging the module and wrote C and
C so that I could ship PPVHF with more than
one filter. He also pointed me to C,
which led me to clean up PPVHF before adding support for SHEK.
Perl code examples where borrowed in Amelia,
aka I.
=head1 TODO
There are a few other syntax highlighting modules on CPAN, which I should
try to add support for in C:
=over 4
=item *
C
=item *
C
=item *
C (seems old)
=item *
C
=back
=head1 BUGS
Please report any bugs or feature requests to
C, or through the web interface at
L. I will be notified, and then you'll automatically
be notified of progress on your bug as I make changes.
=head1 COPYRIGHT & LICENSE
Copyright 2004 Philippe "BooK" Bruhat, All Rights Reserved.
This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
=cut