package HTML::Obliterate;
use strict;
use warnings;
our $VERSION = '0.3';
use base 'Exporter';
our @EXPORT_OK = qw(
remove_html remove_html_from_string
obliterate_html kill_html
erase_html extinguish_html
extirpate_html abolish_html
doff_html eliminate_html
drop_html purge_html
strip_html destroy_html
defenestrate_html
);
sub remove_html {
my ($item) = @_;
if(ref $item eq 'ARRAY') {
my @copy;
for my $i (0 .. (@{ $item } -1)) {
$item->[$i] = remove_html_from_string($item->[$i]) if !defined wantarray;
$copy[$i] = remove_html_from_string($item->[$i]) if defined wantarray;
}
return $item if !defined wantarray; # seems kind of pointless but its not, hmm a riddle :)
return \@copy;
}
return remove_html_from_string($item);
}
sub remove_html_from_string {
my($string) = @_;
$string =~ s{ < \s* [!] \s* [-] \s* [-] \s* .*? [-] \s* [-] \s* > }{}oxms; # comment's w/ posible HTML
$string =~ s{ < \W* [^>]* > (?: [^<]* >)* }{}oxmsg;
$string =~ s{ [&][#]? \w+ [;]? }{}oxmsg;
return $string;
}
sub obliterate_html { goto &HTML::Obliterate::remove_html }
sub kill_html { goto &HTML::Obliterate::remove_html }
sub erase_html { goto &HTML::Obliterate::remove_html }
sub extinguish_html { goto &HTML::Obliterate::remove_html }
sub extirpate_html { goto &HTML::Obliterate::remove_html }
sub abolish_html { goto &HTML::Obliterate::remove_html }
sub doff_html { goto &HTML::Obliterate::remove_html }
sub eliminate_html { goto &HTML::Obliterate::remove_html }
sub drop_html { goto &HTML::Obliterate::remove_html }
sub purge_html { goto &HTML::Obliterate::remove_html }
sub strip_html { goto &HTML::Obliterate::remove_html }
sub destroy_html { goto &HTML::Obliterate::remove_html }
sub defenestrate_html { goto &HTML::Obliterate::remove_html }
1;
__END__
=head1 NAME
HTML::Obliterate - Perl extension to remove HTML from a string or arrayref of strings.
=head1 SYNOPSIS
use HTML::Obliterate qw(extirpate_html);
my $html_less_version_of_string = extirpate_html( $html_code_string );
=head1 DESCRIPTION
Removes HTML tags and entities from a string, efficiently and reliably.
=head2 EXPORT
None by default. But all functions can be.
=head1 FUNCTIONS
=head2 remove_html_from_string()
Takes a string, removes all HTML tags and entities, and returns the HTMl-free version.
=head2 remove_html()
Same as remove_html_from_string() except you can also pass it an array ref of strings to have their HTML removed.
In void context it will modify the array passed:
my @html = ...
remove_html(\@html);
# every item in @html now does not have any HTML tags in it
Otherwise it returns an array ref to a new array:
my @html = ...
my $html_free = remove_html(\@html);
# @html is still the same strings, including any HTML tags
# $html_free is an array ref of an array that is a copy of @html except without any HTML tags
=head2 not the same boring code
Tired of the same old thing? Surprise your wife, impress your boss, and
amaze your friends with these wild, wacky, alternative aliases to remove_html():
=over 4
=item obliterate_html
=item kill_html
=item erase_html
=item extinguish_html
=item extirpate_html
My favorite just FYI ;)
=item abolish_html
=item doff_html
My second favorite...
=item eliminate_html
=item drop_html
=item purge_html
=item strip_html
=item defenestrate_html
A new favorite
=back
=head1 AUTHOR
Daniel Muey, L
=head1 COPYRIGHT AND LICENSE
Copyright (C) 2006 by Daniel Muey
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.6 or,
at your option, any later version of Perl 5 you may have available.
=cut