package Pangloss::HTML::Stripper;
# shamelessly stolen from HTML::Parser's 'htext' example and adapted.
use strict;
use HTML::Parser 3.00 ();
use base qw( Pangloss::Object );
sub strip {
my $self = shift;
my $html = shift;
$html = $$html if ref($html);
$self->{inside} = {};
HTML::Parser->new(
api_version => 3,
handlers => [
start => [sub {$self->tag(@_)}, "tagname, '+1'"],
end => [sub {$self->tag(@_)}, "tagname, '-1'"],
text => [sub {$self->text(@_)}, "dtext"],
],
marked_sections => 1,
)->parse($html);
return delete $self->{text};
}
sub tag {
my ($self, $tag, $num) = @_;
$self->{inside}->{$tag} += $num;
$self->{text} .= ' ' unless $self->{text} =~ /\s\z/;
}
sub text {
my ($self, $text) = @_;
return if $self->{inside}->{script} || $self->{inside}->{style};
$self->{text} .= $text;
}
1;