package Pod::Xhtml; use strict; use Pod::Parser; use Pod::ParseUtils; use vars qw/@ISA %COMMANDS %SEQ $VERSION/; @ISA = qw(Pod::Parser); ($VERSION) = ('$Revision: 1.43 $' =~ m/([\d\.]+)/); # recognized commands %COMMANDS = map { $_ => 1 } qw(pod head1 head2 head3 head4 item over back for begin end); # recognized special sequences %SEQ = ( B => \&seqB, C => \&seqC, E => \&seqE, F => \&seqF, I => \&seqI, L => \&seqL, S => \&seqS, X => \&seqX, Z => \&seqZ, ); ########## New PUBLIC methods for this class sub asString { my $self = shift; return $self->{buffer}; } sub asStringRef { my $self = shift; return \$self->{buffer}; } sub addHeadText { my $self = shift; $self->{HeadText} .= shift; } sub addBodyOpenText { my $self = shift; $self->{BodyOpenText} .= shift; } sub addBodyCloseText { my $self = shift; $self->{BodyCloseText} .= shift; } ########## Override methods in Pod::Parser ########## PUBLIC INTERFACE sub parse_from_file { my $self = shift; $self->resetMe; $self->SUPER::parse_from_file(@_); } sub parse_from_filehandle { my $self = shift; $self->resetMe; $self->SUPER::parse_from_filehandle(@_); } ########## INTERNALS sub initialize { my $self = shift; $self->{TopLinks} = qq(
) unless defined $self->{TopLinks}; $self->{MakeIndex} = 1 unless defined $self->{MakeIndex}; $self->{MakeMeta} = 1 unless defined $self->{MakeMeta}; $self->{FragmentOnly} = 0 unless defined $self->{FragmentOnly}; $self->{HeadText} = $self->{BodyOpenText} = $self->{BodyCloseText} = ''; $self->{LinkParser} ||= new Pod::Hyperlink; $self->{IsFirstCommand} = 1; $self->{FirstAnchor} = "TOP"; $self->SUPER::initialize(); } sub command { my ($parser, $command, $paragraph, $line_num, $pod_para) = @_; my $ptree = $parser->parse_text( $paragraph, $line_num ); $pod_para->parse_tree( $ptree ); $parser->parse_tree->append( $pod_para ); } sub verbatim { my ($parser, $paragraph, $line_num, $pod_para) = @_; $parser->parse_tree->append( $pod_para ); } sub textblock { my ($parser, $paragraph, $line_num, $pod_para) = @_; my $ptree = $parser->parse_text( $paragraph, $line_num ); $pod_para->parse_tree( $ptree ); $parser->parse_tree->append( $pod_para ); } sub end_pod { my $self = shift; my $ptree = $self->parse_tree; # clean up tree ready for parse foreach my $para (@$ptree) { if ($para->{'-prefix'} eq '=') { $para->{'TYPE'} = 'COMMAND'; } elsif (! @{$para->{'-ptree'}}) { $para->{'-ptree'}->[0] = $para->{'-text'}; $para->{'TYPE'} = 'VERBATIM'; } else { $para->{'TYPE'} = 'TEXT'; } foreach (@{$para->{'-ptree'}}) { unless (ref $_) { s/\n\s+$//; } } } # now loop over each para and expand any html escapes or sequences $self->_paraExpand( $_ ) foreach (@$ptree); $self->{buffer} =~ s/(\n?)<\/pre>\s*/$1/sg; # concatenate 'pre' blocks
1 while $self->{buffer} =~ s/(\s+)<\/pre>/$1/sg;
$self->{buffer} = $self->_makeIndex . $self->{buffer} if $self->{MakeIndex};
$self->{buffer} =~ s/<<>>/$self->{FirstAnchor}/ge;
$self->{buffer} = join "\n", qq[], $self->{buffer}, "";
# Expand internal L<> links to the correct sections
$self->{buffer} =~ s/#<<<(.*?)>>>/'#' . $self->_findSection($1)/eg;
die "gotcha" if $self->{buffer} =~ /#<<;
my $headblock = sprintf "%s\n%s\n\t%s \n",
qq(),
qq(\n),
_htmlEscape( $self->{doctitle} );
$headblock .= $self->_makeMeta if $self->{MakeMeta};
unless ($self->{FragmentOnly}) {
$self->{buffer} = $headblock . $self->{HeadText} . "\n\n" . $self->{BodyOpenText} . $self->{buffer};
$self->{buffer} .= $self->{BodyCloseText} . "\n\n";
}
# in stringmode we only accumulate the XHTML else we print it to the
# filehandle
unless ($self->{StringMode}) {
my $out_fh = $self->output_handle;
print $out_fh $self->{buffer};
}
}
########## Everything else is PRIVATE
sub resetMe {
my $self = shift;
$self->{'-ptree'} = new Pod::ParseTree;
$self->{'sections'} = [];
$self->{'listKind'} = [];
$self->{'listHasItems'} = [];
$self->{'dataSections'} = [];
$self->{'section_names'} = {};
$self->{'section_ids'} = {};
foreach (qw(inList titleflag )) { $self->{$_} = 0; }
foreach (qw(buffer doctitle)) { $self->{$_} = ''; }
}
sub parse_tree { return $_[0]->{'-ptree'}; }
sub _paraExpand {
my $self = shift;
my $para = shift;
# collapse interior sequences and strings
foreach ( @{$para->{'-ptree'}} ) {
$_ = (ref $_) ? $self->_handleSequence($_) : _htmlEscape( $_ );
}
# the parse tree has now been collapsed into a list of strings
if ($para->{TYPE} eq 'TEXT') {
return if @{$self->{dataSections}};
$self->_addTextblock( join('', @{$para->{'-ptree'}}) );
} elsif ($para->{TYPE} eq 'VERBATIM') {
return if @{$self->{dataSections}};
my $paragraph = "" . join('', @{$para->{'-ptree'}}) . "\n\n";
my $parent_list = $self->{listKind}[-1];
if ($parent_list && $parent_list == 2) {
$paragraph = "$paragraph ";
}
$self->{buffer} .= $paragraph;
if ($self->{titleflag} != 0) {
$self->_setTitle( $paragraph );
warn "NAME followed by verbatim paragraph";
}
} elsif ($para->{TYPE} eq 'COMMAND') {
$self->_addCommand($para->{'-name'}, join('', @{$para->{'-ptree'}}), $para->{'-text'}, $para->{'-line'} )
} else {
warn "Unrecognized paragraph type $para->{TYPE} found at $self->{_INFILE} line $para->{'-line'}\n";
}
}
sub _addCommand {
my $self = shift;
my ($command, $paragraph, $raw_para, $line) = @_;
my $anchor;
unless (exists $COMMANDS{$command}) {
warn "Unrecognized command '$command' skipped at $self->{_INFILE} line $line\n";
return;
}
for ($command) {
my $data_para = @{$self->{dataSections}}; # inside a data paragraph?
/^head1/ && !$data_para && do {
$anchor = $self->_addSection( 'head1', $paragraph );
$self->{buffer} .= qq($paragraph
)
.($self->{TopLinks} ? $self->{TopLinks} : '')."\n\n";
if ($anchor eq 'NAME') { $self->{titleflag} = 1; }
last;
};
/^head([234])/ && !$data_para && do {
my $head_level = $1;
$anchor = $self->_addSection( "head${head_level}", $paragraph );
$self->{buffer} .= qq($paragraph \n\n);
last;
};
/^item/ && !$data_para && do {
unless ($self->{inList}) {
warn "Not in list at $self->{_INFILE} line $line\n";
last;
}
$self->{listHasItems}[-1] = 1;
$self->{listCurrentParas}[-1] = 0;
# is this the first item in the list?
if (@{$self->{listKind}} && $self->{listKind}[-1] == 0) {
my $parent_list = $self->{listKind}[-2]; # this is a sub-list
if ($parent_list && $parent_list == 1) {
# sub lists must be in an - [BEGIN]
$self->{buffer} .= "
- ";
} elsif ($parent_list && $parent_list == 2) {
#
sub lists must be in a - [BEGIN]
$self->{buffer} .= "
- ";
}
if ($paragraph eq '*') {
$self->{listKind}[-1] = 1;
$self->{buffer} .= "
\n";
} else {
$self->{listKind}[-1] = 2;
$self->{buffer} .= "\n";
}
} else {
# close last list item's tag#
if ($self->{listKind}[-1] == 1) {
$self->{buffer} .= "
\n";
}
}
if (@{$self->{listKind}} && $self->{listKind}[-1] == 2) {
$self->{buffer} .= qq(\t- {MakeIndex} >= 2) {
$anchor = $self->_addSection( 'list', $paragraph );
$self->{buffer} .= qq( id="$anchor");
}
$self->{buffer} .= ">";
$self->{buffer} .= qq($paragraph
\n);
}
last;
};
/^over/ && !$data_para && do {
$self->{inList}++;
push @{$self->{listKind}}, 0;
push @{$self->{listHasItems}}, 0;
push @{$self->{sections}}, 'OVER';
push @{$self->{listCurrentParas}}, 0;
};
/^back/ && !$data_para && do {
if (--$self->{inList} < 0) {
warn "=back commands don't balance =overs at $self->{_INFILE} line $line\n";
last;
} elsif ($self->{listHasItems} == 0) {
warn "empty list at $self->{_INFILE} line $line\n";
last;
} elsif (@{$self->{listKind}} && $self->{listKind}[-1] == 1) {
$self->{buffer} .= "\n
\n\n";
} else {
$self->{buffer} .= "\n";
}
my $parent_list = $self->{listKind}[-2]; # this is a sub-list
if ($parent_list && $parent_list == 1) {
# sub lists must be in an - [END]
$self->{buffer} .= "
\n";
}
if ($parent_list && $parent_list == 2) {
# sub lists must be in a - [END]
$self->{buffer} .= "
\n";
}
if ($self->{sections}[-1] eq 'OVER')
{
pop @{$self->{sections}};
} else {
push @{$self->{sections}}, 'BACK';
}
pop @{$self->{listHasItems}};
pop @{$self->{listKind}};
pop @{$self->{listCurrentParas}};
last;
};
/^for/ && !$data_para && do {
my ($html) = $raw_para =~ /^\s*(?:pod2)?x?html\s+(.*)/;
$self->{buffer} .= $html if $html;
};
/^begin/ && !$data_para && do {
my ($ident) = $paragraph =~ /(\S+)/;
push @{$self->{dataSections}}, $ident;
last;
};
/^end/ && do {
my ($ident) = $paragraph =~ /(\S+)/;
unless (@{$self->{dataSections}}) {
warn "no corresponding '=begin $ident' marker at $self->{_INFILE} line $line\n";
last;
}
my $current_section = $self->{dataSections}[-1];
unless ($current_section eq $ident) {
warn "'=end $ident' doesn't match '=begin $current_section' at $self->{_INFILE} line $line\n";
last;
}
pop @{$self->{dataSections}};
last;
};
}
if ($anchor && $self->{IsFirstCommand})
{
$self->{FirstAnchor} = $anchor;
$self->{IsFirstCommand} = 0;
}
}
sub _addTextblock {
my $self = shift;
my $paragraph = shift;
if ($self->{titleflag} != 0) { $self->_setTitle( $paragraph ); }
if (! @{$self->{listKind}} || $self->{listKind}[-1] == 0) {
$self->{buffer} .= "$paragraph
\n\n";
} elsif (@{$self->{listKind}} && $self->{listKind}[-1] == 1) {
if ($self->{listCurrentParas}[-1]++ == 0) {
$self->{buffer} .= "\t
- $paragraph";
} else {
$self->{buffer} .= "\n
$paragraph";
}
} else {
$self->{buffer} .= "\t\t- $paragraph
\n";
}
}
# expand interior sequences recursively, bottom up
sub _handleSequence {
my $self = shift;
my $seq = shift;
my $buffer = '';
foreach (@{$seq->{'-ptree'}}) {
if (ref $_) {
$buffer .= $self->_handleSequence($_);
} else {
$buffer .= _htmlEscape($_);
}
}
unless (exists $SEQ{$seq->{'-name'}}) {
warn "Unrecognized special sequence '$seq->{'-name'}' skipped at $self->{_INFILE} line $seq->{'-line'}\n";
return $buffer;
}
return $SEQ{$seq->{'-name'}}->($self, $buffer);
}
sub _makeIndexId {
my $arg = shift;
$arg =~ s/\W+/_/g;
$arg =~ s/^_+|_+$//g;
$arg =~ s/__+/_/g;
$arg = substr($arg, 0, 36);
return $arg;
}
sub _addSection {
my $self = shift;
my ($type, $htmlarg) = @_;
return unless defined $htmlarg;
my $index_id;
if ($self->{section_names}{$htmlarg}) {
$index_id = $self->{section_names}{$htmlarg};
} else {
$index_id = _makeIndexId($htmlarg);
if ($self->{section_ids}{$index_id}) {
$index_id .= "-" . ++$self->{section_ids}{$index_id};
} else {
$self->{section_ids}{$index_id}++;
}
$self->{section_names}{$htmlarg} = $index_id;
}
push( @{$self->{sections}}, [$type, $index_id, $htmlarg]);
return $index_id;
}
sub _findSection {
my $self = shift;
my ($htmlarg) = @_;
my $index_id;
if ($index_id = $self->{section_names}{$htmlarg}) {
return $index_id;
} else {
return _makeIndexId($htmlarg);
}
}
sub _get_elem_level {
my $elem = shift;
if (ref($elem))
{
my $type = $elem->[0];
if ($type =~ /^head(\d+)$/)
{
return $1;
}
else
{
return 0;
}
}
else
{
return 0;
}
}
sub _makeIndex {
my $self = shift;
$self->{FirstAnchor} = "TOP";
my $string = "\nIndex
\n\n";
$self->{FirstAnchor} = "TOP";
my $i = 0;
my $previous_level = 0;
for (my $i=0;$i< @{$self->{sections}} ; $i++)
{
local $_ = $self->{sections}->[$i];
my $next = ($self->{'sections'}->[$i+1] || "");
if (ref $_) {
my ($type, $href, $name) = @$_;
my $index_link = "";
my $next_level = _get_elem_level($next);
my $this_level = _get_elem_level($_) || $previous_level;
if ($this_level < $previous_level)
{
$index_link .=
("
\n \n" x ($previous_level - $this_level));
}
$index_link .= qq(\t- ${name});
if ($next eq "OVER")
{
$index_link .= "
\n";
}
elsif ($next_level > $this_level)
{
$index_link .= "
\n";
$index_link .=
("\n- \n" x ($next_level - $this_level - 1)) .
"
\n";
}
else
{
$index_link .= "
\n";
}
# $index_link = qq($index_link
) unless ($type eq 'head1');
$string .= $index_link;
} elsif ($_ eq 'OVER') {
$string .= qq(\t\n);
} elsif ($_ eq 'BACK') {
$string .= qq(\t
\n
\n);
}
$previous_level = _get_elem_level($_) || $previous_level;
}
$string .=
("
\n\n" x ($previous_level-1)) . "\n";
$string .= "
\n\n\n";
return $string;
}
sub _makeMeta {
my $self = shift;
return
qq(\t\n)
. qq(\t\n)
. qq(\t\n)
. qq(\t\n)
. qq(\t\n);
}
sub _setTitle {
my $self = shift;
my $paragraph = shift;
if ($paragraph =~ m/^(.+?) - /) {
$self->{doctitle} = $1;
} elsif ($paragraph =~ m/^(.+?): /) {
$self->{doctitle} = $1;
} elsif ($paragraph =~ m/^(.+?)\.pm/) {
$self->{doctitle} = $1;
} else {
$self->{doctitle} = substr($paragraph, 0, 80);
}
$self->{titleflag} = 0;
}
sub _htmlEscape {
my $txt = shift;
$txt =~ s/&/&/g;
$txt =~ s/</g;
$txt =~ s/>/>/g;
$txt =~ s/\"/"/g;
return $txt;
}
########## Sequence handlers
sub seqI { return '' . $_[1] . ''; }
sub seqB { return '' . $_[1] . ''; }
sub seqC { return '' . $_[1] . ''; }
sub seqF { return '' . $_[1] . ''; }
sub seqZ { return ''; }
sub seqL {
my ($self, $link) = @_;
$self->{LinkParser}->parse( $link );
my $page = $self->{LinkParser}->page;
my $kind = $self->{LinkParser}->type;
my $string = '';
if ($kind eq 'hyperlink') { #easy, a hyperlink
my $targ = $self->{LinkParser}->node;
my $text = $self->{LinkParser}->text;
$string = qq($text);
} elsif ($page eq '') { # a link to this page
# Post-process these links so we can things up to the correct sections
my $targ = $self->{LinkParser}->node;
$string = $self->{LinkParser}->markup;
$string =~ s|Q<(.+?)>|$1|;
} elsif ($link !~ /\|/) { # a link off-page with _no_ alt text
$string = $self->{LinkParser}->markup;
$string =~ s|Q<(.+?)>|$1|;
$string =~ s|P<(.+?)>|$1|;
} else { # a link off-page with alt text
my $text = _htmlEscape( $self->{LinkParser}->text );
my $targ = _htmlEscape( $self->{LinkParser}->node );
$string = "$text (";
$string .= "$targ in " if $targ;
$string .= "$page)";
}
return $string;
}
sub seqS {
my $text = $_[1];
$text =~ s/\s/ /g;
return $text;
}
sub seqX {
my $self = shift;
my $arg = shift;
my $anchor = $self->_addSection( 'head1', $arg );
return qq[$arg];
}
sub seqE {
my $self = shift;
my $arg = shift;
my $rv;
if ($arg eq 'sol') {
$rv = '/';
} elsif ($arg eq 'verbar') {
$rv = '|';
} elsif ($arg =~ /^\d$/) {
$rv = "$arg;";
} elsif ($arg =~ /^0?x(\d+)$/) {
$rv = $1;
} else {
$rv = "&$arg;";
}
return $rv;
}
1;
__END__
=head1 NAME
Pod::Xhtml - Generate well-formed XHTML documents from POD format documentation
=head1 SYNOPSIS
This module inherits from Pod::Parser, hence you can use this familiar
interface:
use Pod::Xhtml;
my $parser = new Pod::Xhtml;
$parser->parse_from_file( $infile, $outfile );
# or use filehandles instead
$parser->parse_from_filehandle($in_fh, $out_fh);
# or get the XHTML as a scalar
my $parsertoo = new Pod::Xhtml( StringMode => 1 );
$parsertoo->parse_from_file( $infile, $outfile );
my $xhtml = $parsertoo->asString;
# or get a reference to the XHTML string
my $xhtmlref = $parsertoo->asStringRef;
# to parse some other pod file to another output file all you need to do is...
$parser->parse_from_file( $anotherinfile, $anotheroutfile );
There are options specific to Pod::Xhtml that you can pass in at construction
time, e.g.:
my $parser = new Pod::Xhtml(StringMode => 1, MakeIndex => 0);
See L<"OPTIONS">. For more information also see L which this
module inherits from.
=head1 DESCRIPTION
=over 4
=item new Pod::Xhtml( [ OPTIONS ] )
Create a new object. Optionally pass in some options in the form
C<'new Pod::Xhtml( StringMode =E 1);'>
=item $parser->parse_from_file( INPUTFILE, [OUTPUTFILE] )
Read POD from the input file, output to the output file (or STDOUT if no
file is given). See Pod::Parser docs for more.
Note that you can parse multiple files with the same object. All your options
will be preserved, as will any text you added with the add*Text methods.
=item $parser->parse_from_filehandle( [INPUTFILEHANDLE, [OUTPUTFILEHANDLE]] )
Read POD from the input filehandle, output to the output filehandle
(STDIN/STDOUT if no filehandle(s) given). See Pod::Parser docs for more. Note
that you can parse multiple files with the same object. All your options will
be preserved, as will any text you added with the add*Text methods.
=item $parser->asString
Get the XHTML as a scalar. You'll probably want to use this with the
StringMode option.
=item $parser->asStringRef
As above, but you get a reference to the string, not the string itself.
=item $parser->addHeadText( $text )
Inserts some text just before the closing head tag. For example you can add a
link to a stylesheet. May be called many times to add lots of text. Note: you
need to call this some time B any output is done, e.g. straight after
new(). Make sure that you only insert valid XHTML fragments.
=item $parser->addBodyOpenText( $text ) / $parser->addBodyCloseText( $text )
Inserts some text right at the beginning (or ending) of the body element. For
example you can add a navigation header and footer. May be called many times
to add lots of text. Note: you need to call this some time B any output
is done, e.g. straight after new(). Make sure that you only insert valid XHTML
fragments.
=back
=head1 OPTIONS
=over 4
=item StringMode
Default: 0. If set to 1 this does no output at all, even if filenames/handles
are supplied. Use asString or asStringRef to access the text if you set this
option.
=item MakeIndex
Default: 1. If set to 1 then an index of sections is created at the top of the
body. If set to 2 then the index includes non-bulleted list items
=item MakeMeta
Default: 1. If set to 1 then some meta tags are created, recording things like
input file, description, etc.
=item FragmentOnly
Default: 0. If 1, we only produce an XHTML fragment (suitable for use as a
server-side include etc). There is no HEAD element nor any BODY or HTML
tags. Any text added with the add*Text methods will B be output.
=item TopLinks
At each section head this text is added to provide a link back to the top.
Set to 0 or '' to inhibit links, or define your own.
Default:
=item LinkParser
An object that parses links in the POD document. By default, this is a regular
Pod::Hyperlink object. Any user-supplied link parser must conform the the
Pod::Hyperlink API.
=back
=head1 RATIONALE
There's Pod::PXML and Pod::XML, so why do we need Pod::Xhtml? You need an XSLT
to transform XML into XHTML and many people don't have the time or inclination
to do this. But they want to make sure that the pages they put on their web
site are well-formed, they want those pages to use stylesheets easily, and
possibly they want to squirt the XHTML through some kind of filter for more
processing.
By generating well-formed XHTML straight away we allow anyone to just use the
output files as-is. For those who want to use XML tools or transformations they
can use the XHTML as a source, because it's a well-formed XML document.
=head1 CAVEATS
This module outputs well-formed XHTML if the POD is well-formed. To check this
you can use something like:
use Pod::Checker;
my $syn = podchecker($defaultIn);
If $syn is 0 there are no syntax errors. If it's -1 then no POD was found. Any
positive number indicates that that number of errors were found. If the input
POD has errors then the output XHTML I be well-formed but will probably
omit information, and in addition Pod::Xhtml will emit warnings. Note that
Pod::Parser seems to be sensitive to the current setting of $/ so ensure it's
the end-of-line character when the parsing is done.
=head1 AUTHOR
P Kent E Simon Flack Ecpan _at_ bbc _dot_ co _dot_ ukE
=head1 COPYRIGHT
(c) BBC 2004, 2005. This program is free software; you can redistribute it
and/or modify it under the GNU GPL.
See the file COPYING in this distribution, or http://www.gnu.org/licenses/gpl.txt
=cut