package PPI::HTML; =pod =head1 NAME PPI::HTML - Generate syntax-hightlighted HTML for Perl using PPI =head1 SYNOPSIS use PPI; use PPI::HTML; # Load your Perl file my $Document = PPI::Document->load( 'script.pl' ); # Create a reusable syntax highlighter my $Highlight = PPI::HTML->new( line_numbers => 1 ); # Spit out the HTML print $Highlight->html( $Document ); =head1 DESCRIPTION PPI::HTML converts Perl documents into syntax highlighted HTML pages. =head1 HISTORY PPI::HTML is the successor to the now-redundant PPI::Format::HTML. While early on it was thought that the same formatting code might be able to be used for a variety of different types of things (ANSI and HTML for example) later developments with the here-doc code and the need for independantly written serializers meant that this idea had to be discarded. In addition, the old module only made use of the Tokenizer, and had a pretty shit API to boot. =head2 API Overview The new module is much cleaner. Simply create an object with the options you want, pass L objects to the C method, and you get strings of HTML that you can do whatever you want with. =head1 METHODS =cut use 5.005; use strict; use CSS::Tiny (); use PPI::Document (); use PPI::HTML::Fragment (); use Params::Util '_HASH', '_INSTANCE'; use vars qw{$VERSION}; BEGIN { $VERSION = '1.07'; } ##################################################################### # Constructor and Accessors =pod =head2 new %args The C constructor takes a simple set of key/value pairs to define the formatting options for the HTML. =over =item page Is the C option is enabled, the generator will wrap the generated HTML fragment in a basic but complete page. =item line_numbers At the present time, the only option available. If set to true, line numbers are added to the output. =item colors | colours For cases where you don't want to use an external stylesheet, you can provide C as a hash reference where the keys are CSS classes (generally matching the token name) and the values are colours. This allows basic colouring without the need for a whole stylesheet. =item css The C option lets you provide a custom L object containing any CSS you want to apply to the page (if you are using page mode). If both the C and C options are used, the colour CSS entries will overwrite anything contained in the L object. The object will also be cloned if it to be modified, to prevent destroying any CSS objects passed in. =back Returns a new L object =cut sub new { my $class = ref $_[0] ? ref shift : shift; my %args = @_; # Create the basic object my $self = bless { line_numbers => !! $args{line_numbers}, page => !! $args{page}, # colors => undef, # css => undef, }, $class; # Manually specify the class colours and custom CSS $args{colors} = delete $args{colours} if $args{colours}; $self->{colors} = $args{colors} if _HASH($args{colors}); $self->{css} = $args{css} if _INSTANCE($args{css}, 'CSS::Tiny'); $self; } =pod =head2 css The C accessor returns the L object originally provided to the constructor. =cut sub css { $_[0]->{css} } ##################################################################### # Main Methods =pod =head2 html $Document | $file | \$source The main method for the class, the C method takes a single L object, or anything that can be turned into a L via its C method, and returns a string of HTML formatted based on the arguments given to the C constructor. Returns a string, or C on error. =cut sub html { my $self = shift; my $Document = $self->_Document(shift) or return undef; # Build the basic set of fragments $self->_build_fragments($Document) or return undef; # Interleave the line numbers $self->_build_line_numbers or return undef; # Optimise $self->_optimize_fragments or return undef; # Merge and stringify the fragments $self->_build_html or return undef; # Return the final HTML delete $self->{html}; } # Create the basic list of fragments sub _build_fragments { my ($self, $Document) = @_; # Convert the list of tokens to a list of fragments $self->{fragments} = []; $self->{heredoc_buffer} = undef; foreach my $Token ( $Document->tokens ) { # Find the Fragments for the token my @fragments = (); if ( _INSTANCE($Token, 'PPI::Token::HereDoc') ) { @fragments = $self->_heredoc_fragments($Token) or return undef; } else { @fragments = $self->_simple_fragments($Token) or return undef; } # Add the fragments foreach my $Fragment ( @fragments ) { $self->_add_fragment( $Fragment ) or return undef; } } # Are there any trailing heredoc lines to add? if ( $self->{heredoc_buffer} ) { # Unless the last line ends in a newline, add one unless ( $self->{fragments}->[-1]->ends_line ) { my $Fragment = PPI::HTML::Fragment->new( "\n" ) or return undef; push @{$self->{fragments}}, $Fragment; } # Add the remaining buffer lines push @{$self->{fragments}}, @{$self->{heredoc_buffer}}; } # We don't need the heredoc buffer any more delete $self->{heredoc_buffer}; 1; } sub _simple_fragments { my ($self, $Token) = @_; # Split the token content into strings my @strings = grep { defined $_ and length $_ } split /(?<=\n)/, $Token->content; # Convert each string to a fragment my @fragments = (); my $css_class = $self->_css_class( $Token ); foreach my $string ( @strings ) { my $Fragment = PPI::HTML::Fragment->new( $string, $css_class ) or return (); push @fragments, $Fragment; } @fragments; } sub _heredoc_fragments { my ($self, $Token) = @_; # First, create the heredoc content lines and add them # to the buffer foreach my $line ( $Token->heredoc ) { $self->_add_heredoc( $line, 'heredoc_content' ) or return (); } # Add the terminator line $self->_add_heredoc( $Token->terminator . "\n", 'heredoc_terminator' ) or return (); # Return a single fragment for the main content part my $Fragment = PPI::HTML::Fragment->new( $Token->content, $self->_css_class( $Token ) ) or return (); $Fragment; } sub _build_line_numbers { my $self = shift; return 1 unless $self->{line_numbers}; # Find the width of the highest line number, so that # we can pad the line numbers my $max = 1 + scalar map { $_->ends_line } @{$self->{fragments}}; my $width = length("$max"); my $pattern = "\%${width}s: "; # Iterate over the existing array, and insert new line # fragments after each newline. my $line = 1; my @fragments = map { $_->ends_line ? ($_, $self->_line_fragment( sprintf($pattern, ++$line) )) : ($_) } @{$self->{fragments}}; # Add the fragment for line 1 to the beginning unshift @fragments, $self->_line_fragment( sprintf($pattern, 1) ); $self->{fragments} = \@fragments; 1; } sub _build_html { my $self = shift; # Iterate over the loop, stringifying and merging my $html = ''; foreach my $Fragment ( @{$self->{fragments}} ) { $html .= $Fragment->html; } # Page wrap if needed if ( $self->{page} ) { my $css = $self->_css_html; $html = < $css
$html
END_HTML } # Replace the fragments array with the HTML $self->{html} = $html; delete $self->{fragments}; 1; } sub _optimize_fragments { my $self = shift; # Iterate through and do the simplest optimisation layer, # when is joining identical adjacent fragments. my $current = $self->{fragments}; my @fragments = ( shift @$current ); foreach my $Fragment ( @$current ) { if ( $Fragment->css and $fragments[-1]->css and $Fragment->css eq $fragments[-1]->css ) { $fragments[-1]->concat( $Fragment->string ); } else { push @fragments, $Fragment; } } # Remove the class from all whitespace foreach my $Fragment ( @fragments ) { my $css = $Fragment->css or next; $Fragment->clear if $css eq 'whitespace'; } # If we know what classes are coloured, strip the style # from everything that doesn't have a colour. if ( $self->{colors} ) { my $colors = $self->{colors}; foreach my $Fragment ( @fragments ) { my $css = $Fragment->css or next; next if $colors->{$css}; $Fragment->clear; } } # Overwrite the fragments list $self->{fragments} = \@fragments; 1; } # For a set of colors, generate the relevant CSS sub _css_html { my $self = shift; # Create and fill a CSS object my $css = $self->{css} ? $self->{css}->clone : CSS::Tiny->new; foreach my $key ( sort keys %{$self->{colors}} ) { $css->{".$key"}->{color} = $self->{colors}->{$key}; } keys %$css ? $css->html : ''; } ##################################################################### # Support Methods # Create a Document from anything we can sub _Document { my $class = shift; _INSTANCE( $_[0], 'PPI::Document' ) ? $_[0] # Already a Document : PPI::Document->new( @_ ); # Make a Document } # Create a Fragment from anything we can sub _Fragment { my $class = shift; _INSTANCE( $_[0], 'PPI::HTML::Fragment' ) ? $_[0] : PPI::HTML::Fragment->new( @_ ); } sub _add_fragment { my $self = shift; my $Fragment = $self->_Fragment(@_) or return undef; # Add the fragment itself push @{$self->{fragments}}, $Fragment; # If the fragment ends a line, add # anything that is in the heredoc buffer. if ( $self->{heredoc_buffer} and $Fragment->ends_line ) { push @{$self->{fragments}}, @{$self->{heredoc_buffer}}; $self->{heredoc_buffer} = undef; } 1; } sub _add_heredoc { my $self = shift; my $Fragment = $self->_Fragment(@_) or return undef; $self->{heredoc_buffer} ||= []; push @{$self->{heredoc_buffer}}, $Fragment; 1; } sub _line_fragment { my ($self, $line) = @_; PPI::HTML::Fragment->new( $line, 'line_number' ); } sub _css_class { my ($self, $Token) = @_; if ( $Token->isa('PPI::Token::Word') ) { # There are some words we can be very confident are # being used as keywords unless ( $Token->snext_sibling and $Token->snext_sibling->content eq '=>' ) { if ( $Token->content eq 'sub' ) { return 'keyword'; } elsif ( $Token->content eq 'return' ) { return 'keyword'; } elsif ( $Token->content eq 'undef' ) { return 'core'; } elsif ( $Token->content eq 'shift' ) { return 'core'; } elsif ( $Token->content eq 'defined' ) { return 'core'; } } if ( $Token->parent->isa('PPI::Statement::Include') ) { if ( $Token->content =~ /^(?:use|no)$/ ) { return 'keyword'; } if ( $Token->content eq $Token->parent->pragma ) { return 'pragma'; } } elsif ( $Token->parent->isa('PPI::Statement::Variable') ) { if ( $Token->content =~ /^(?:my|local|our)$/ ) { return 'keyword'; } } elsif ( $Token->parent->isa('PPI::Statement::Compond') ) { if ( $Token->content =~ /^(?:if|else|elsif|unless|for|foreach|while|my)$/ ) { return 'keyword'; } } elsif ( $Token->parent->isa('PPI::Statement::Package') ) { if ( $Token->content eq 'package' ) { return 'keyword'; } } elsif ( $Token->parent->isa('PPI::Statement::Scheduled') ) { return 'keyword'; } } # Normal colouring my $css = lc ref $Token; $css =~ s/^.+:://; $css; } 1; =pod =head1 SUPPORT Bugs should always be submitted via the CPAN bug tracker L For other issues, contact the maintainer =head1 AUTHOR Adam Kennedy Ecpan@ali.asE Funding provided by The Perl Foundation =head1 SEE ALSO L, L =head1 COPYRIGHT Copyright (c) 2005, 2006 Adam Kennedy. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. The full text of the license can be found in the LICENSE file included with this module. =cut