########################################### # File::Comments::Plugin::HTML # 2005, Mike Schilli ########################################### ########################################### package File::Comments::Plugin::HTML; ########################################### use strict; use warnings; use File::Comments::Plugin; use Log::Log4perl qw(:easy); use HTML::TokeParser; our $VERSION = "0.01"; our @ISA = qw(File::Comments::Plugin); ########################################### sub init { ########################################### my($self) = @_; $self->register_suffix(".htm"); $self->register_suffix(".html"); $self->register_suffix(".HTML"); $self->register_suffix(".HTM"); } ########################################### sub type { ########################################### my($self, $target) = @_; return "html"; } ########################################### sub comments { ########################################### my($self, $target) = @_; return $self->extract_html_comments($target); } ########################################### sub stripped { ########################################### my($self, $target) = @_; return $self->strip_html_comments($target); } ########################################### sub extract_html_comments { ########################################### my($self, $target) = @_; my @comments = (); my $stream = HTML::TokeParser->new( \$target->{content}); while(my $token = $stream->get_token()) { next unless $token->[0] eq "C"; $token->[1] =~ s/^$//; push @comments, $token->[1]; } return \@comments; } ########################################### sub strip_html_comments { ########################################### my($self, $target) = @_; require HTML::TreeBuilder; my $root = HTML::TreeBuilder->new(); $root->parse($target->{content}); if(!$root) { WARN "Cannot parse $target->{path}"; return $target->{content}; } return $root->as_HTML(); } 1; __END__ =head1 NAME File::Comments::Plugin::HTML - Plugin to detect comments in HTML source code =head1 SYNOPSIS use File::Comments::Plugin::HTML; =head1 DESCRIPTION File::Comments::Plugin::HTML is a plugin for the File::Comments framework. It uses HTML::TokeParser to extracts comments from HTML files. =head1 LEGALESE Copyright 2005 by Mike Schilli, all rights reserved. This program is free software, you can redistribute it and/or modify it under the same terms as Perl itself. =head1 AUTHOR 2005, Mike Schilli