package Plagger::Plugin::Filter::HTMLTidy; use strict; use base qw( Plagger::Plugin ); use HTML::Tidy; sub register { my($self, $context) = @_; $context->register_hook( $self, 'update.entry.fixup' => \&filter, ); } our %defaults = ( doctype => 'omit', output_xhtml => 1, wrap => 0, break_before_br => 0, input_encoding => 'utf8', output_encoding => 'utf8', tidy_mark => 0, ); sub filter { my($self, $context, $args) = @_; my $body = $args->{entry}->body; return unless $body && $body->is_html; my $conf = $self->conf || {}; while (my($key, $value) = each %defaults) { $conf->{$key} = $value unless exists $conf->{$key}; } my $tidy = HTML::Tidy->new( $self->conf || {} ); $tidy->ignore( type => TIDY_WARNING ); my $new_body = $tidy->clean($body->data); # pass in Unicode string, not UTF-8 # HACK to extract
\s*(.*?)\s*\s*