package Syntax::Highlight::HTML; use strict; use HTML::Parser; { no strict; $VERSION = '0.04'; @ISA = qw(HTML::Parser); } =head1 NAME Syntax::Highlight::HTML - Highlight HTML syntax =head1 VERSION Version 0.04 =cut my %classes = ( declaration => 'h-decl', # declaration process => 'h-pi', # process instruction comment => 'h-com', # comment angle_bracket => 'h-ab', # the characters '<' and '>' as tag delimiters tag_name => 'h-tag', # the tag name of an element attr_name => 'h-attr', # the attribute name attr_value => 'h-attv', # the attribute value entity => 'h-ent', # any entities: é « line_number => 'h-lno', # line number ); my %defaults = ( pre => 1, # add
...
around the result? (default: yes) nnn => 0, # add line numbers (default: no) ); =head1 SYNOPSIS use Syntax::Highlight::HTML; my $highlighter = new Syntax::Highlight::HTML; $output = $highlighter->parse($html); If C<$html> contains the following HTML fragment:
some word
the description of the word. Plus some reference towards another definition.
then the resulting HTML contained in C<$output> will render like this: =begin html
    <!-- a description list -->
    <dl compact="compact">
      <dt>some word</dt>
      <dd>the description of the word. Plus some <a href="/definitions/other_word"
      >reference</a> towards another definition. </dd>
    </dl>
=end html =head1 DESCRIPTION This module is designed to take raw HTML input and highlight it (using a CSS stylesheet, see L<"Notes"> for the classes). The returned HTML code is ready for inclusion in a web page. It is intented to be used as an highlighting filter, and as such does not reformat or reindent the original HTML code. =head1 METHODS =over 4 =item new() The constructor. Returns a C object, which derives from C. As such, any C method can be called on this object (that is, expect for C which is overloaded here). B =over 4 =item * C - Activate line numbering. Default value: 0 (disabled). =item * C
 - Surround result by C<< 
...
>> tags. Default value: 1 (enabled). =back B To avoid surrounding the result by the C<<
...
>> tags: my $highlighter = Syntax::Highlight::HTML->new(pre => 0); =cut sub new { my $self = __PACKAGE__->SUPER::new( # API version api_version => 3, # Options case_sensitive => 1, attr_encoded => 1, # Handlers declaration_h => [ \&_highlight_tag, 'self, event, tagname, attr, text' ], process_h => [ \&_highlight_tag, 'self, event, tagname, attr, text' ], comment_h => [ \&_highlight_tag, 'self, event, tagname, attr, text' ], start_h => [ \&_highlight_tag, 'self, event, tagname, attr, text' ], end_h => [ \&_highlight_tag, 'self, event, tagname, attr, text' ], text_h => [ \&_highlight_text, 'self, text' ], default_h => [ \&_highlight_text, 'self, text' ], ); my $class = ref $_[0] || $_[0]; shift; bless $self, $class; $self->{options} = { %defaults }; my %args = @_; for my $arg (keys %defaults) { $self->{options}{$arg} = $args{$arg} if defined $args{$arg} } $self->{output} = ''; return $self } =item parse() Parse the HTML code given in argument and returns the highlighted HTML code, ready for inclusion in a web page. B $highlighter->parse("

Hello, world.

"); =cut sub parse { my $self = shift; ## parse the HTML fragment $self->{output} = ''; $self->SUPER::parse($_[0]); $self->eof; ## add line numbering? if($self->{options}{nnn}) { my $i = 1; $self->{output} =~ s|^|@{[sprintf '%3d', $i++]} |gm; } ## add
...
? $self->{output} = "
\n" . $self->{output} . "
\n" if $self->{options}{pre}; return $self->{output} } =back =head2 Internals Methods The following methods are for internal use only. =over 4 =item _highlight_tag() C tags handler: highlights a tag. =cut sub _highlight_tag { my $self = shift; my $event = shift; my $tagname = shift; my $attr = shift; $_[0] =~ s|&([^;]+;)|&$1|g; if($event eq 'declaration' or $event eq 'process' or $event eq 'comment') { $_[0] =~ s//>/g; $self->{output} .= qq|| . $_[0] . '' } else { $_[0] =~ s|^<$tagname|<$tagname|; $_[0] =~ s|^$tagname|; $_[0] =~ s|^<(/?)|<$1|; $_[0] =~ s|(/?)>$|$1>|; for my $attr_name (keys %$attr) { next if $attr_name eq '/'; $_[0] =~ s{$attr_name=(["'])\Q$$attr{$attr_name}\E\1} {$attr_name=$1$$attr{$attr_name}$1} } $self->{output} .= $_[0]; } } =item _highlight_text() C text handler: highlights text. =cut sub _highlight_text { my $self = shift; $_[0] =~ s|&([^;]+;)|&$1|g; $self->{output} .= $_[0]; } =back =head1 NOTES The resulting HTML uses CSS to colourize the syntax. Here are the classes that you can define in your stylesheet. =over 4 =item * C<.h-decl> - for a markup declaration; in a HTML document, the only markup declaration is the C, like: C<< >> =item * C<.h-pi> - for a process instruction like C<< >> or C<< >> =item * C<.h-com> - for a comment, C<< >> =item * C<.h-ab> - for the characters C<< '<' >> and C<< '>' >> as tag delimiters =item * C<.h-tag> - for the tag name of an element =item * C<.h-attr> - for the attribute name =item * C<.h-attv> - for the attribute value =item * C<.h-ent> - for any entities: C<é> C<«> =item * C<.h-lno> - for the line numbers =back An example stylesheet can be found in F. =head1 EXAMPLE Here is an example of generated HTML output. It was generated with the script F. The following HTML fragment (which is the beginning of L) search.cpan.org: Sébastien Aperghis-Tramoni
will be rendered like this (using the CSS stylesheet F): =begin html
  1 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
  2 <html>
  3  <head>
  4   <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
  5   <link rel="stylesheet" href="/s/style.css" type="text/css">
  6   <title>search.cpan.org: S&#233;bastien Aperghis-Tramoni</title>
  7  </head>
  8  <body id="cpansearch">
  9 <center><div class="logo"><a href="/"><img src="/s/img/cpan_banner.png" alt="CPAN"></a></div></center>
 10 <div class="menubar">
 11  <a href="/">Home</a>
 12 &middot; <a href="/author/">Authors</a>
 13 &middot; <a href="/recent">Recent</a>
 14 &middot; <a href="/news">News</a>
 15 &middot; <a href="/mirror">Mirrors</a>
 16 &middot; <a href="/faq.html">FAQ</a>
 17 &middot; <a href="/feedback">Feedback</a>
 18 </div>
 19 <form method="get" action="/search" name="f" class="searchbox">
 20 <input type="text" name="query" value="" size="35">
 21 <br>in <select name="mode">
 22  <option value="all">All</option>
 23  <option value="module" >Modules</option>
 24  <option value="dist" >Distributions</option>
 25  <option value="author" >Authors</option>
 26 </select>&nbsp;<input type="submit" value="CPAN Search">
 27 </form>
=end html =head1 CAVEATS C relies on C for parsing the HTML and therefore suffers from the same limitations. =head1 SEE ALSO L =head1 AUTHORS SEbastien Aperghis-Tramoni, Esebastien@aperghis.netE =head1 BUGS Please report any bugs or feature requests to C, or through the web interface at L. I will be notified, and then you'll automatically be notified of progress on your bug as I make changes. =head1 COPYRIGHT & LICENSE Copyright (C)2004 SEbastien Aperghis-Tramoni, All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut 1; # End of Syntax::Highlight::HTML