package Plagger::Plugin::Filter::HTMLScrubber; use strict; use base qw( Plagger::Plugin ); use HTML::Scrubber; sub rules { return( img => { src => qr{^http://}, # only URL with http:// alt => 1, # alt attributes allowed '*' => 0, # deny all others }, style => 0, script => 0, ); } sub default { return( '*' => 1, # default rule, allow all attributes 'href' => qr{^(?!(?:java)?script)}i, 'src' => qr{^(?!(?:java)?script)}i, 'cite' => '(?i-xsm:^(?!(?:java)?script))', 'language' => 0, 'name' => 1, # could be sneaky, but hey ;) 'onblur' => 0, 'onchange' => 0, 'onclick' => 0, 'ondblclick' => 0, 'onerror' => 0, 'onfocus' => 0, 'onkeydown' => 0, 'onkeypress' => 0, 'onkeyup' => 0, 'onload' => 0, 'onmousedown' => 0, 'onmousemove' => 0, 'onmouseout' => 0, 'onmouseover' => 0, 'onmouseup' => 0, 'onreset' => 0, 'onselect' => 0, 'onsubmit' => 0, 'onunload' => 0, 'src' => 0, 'type' => 0, 'style' => 0, ); } sub register { my ( $self, $context ) = @_; $context->register_hook( $self, 'update.entry.fixup' => \&update, 'plugin.init' => \&initialize, ); } sub initialize { my($self, $context, $args) = @_; $self->{scrubber} = do { my $scrubber = HTML::Scrubber->new; my $config = $self->conf; my ( %rules, %default ); unless ( delete $config->{no_default_configs} ) { %rules = $self->rules; %default = $self->default; } $scrubber->rules( %rules, %{ delete $config->{rules} || {} } ); $scrubber->default(1, { %default, %{ delete $config->{default} || {} } }); while ( my ( $method, $arg ) = each %$config ) { eval { $scrubber->$method( ref $arg eq 'ARRAY' ? @$arg : ref $arg eq 'HASH' ? %$arg : $arg ); }; $context->error(qq/Invalid method call "$method": $@/) if $@; } $scrubber; }; } sub update { my ( $self, $context, $args ) = @_; if (defined $args->{entry}->body && $args->{entry}->body->is_html) { $context->log(debug => "Scrubbing body for " . $args->{entry}->permalink || '(no-link)'); my $body = $self->{scrubber}->scrub( $args->{entry}->body ); $args->{entry}->body($body); } } 1; __END__ =head1 NAME Plagger::Plugin::Filter::HTMLScrubber - Scrub feed content =head1 SYNOPSIS - module: Filter::HTMLScrubber config: rules: style: 0 script: 0 =head1 DESCRIPTION This plugin scrubs feed content using L. All config parameters (except 'no_default_configs') are implemented as HTML::Scrubber's method: value. For example, if you write: method: value in the config: section, this plugin will automatically turn the config into the method call: $scrubber->method('value'); See L document for details. =head1 CONFIG =over 4 =item no_default_configs Some rules and default config parameters are set by default. See I and I methods defined in this module code for details. If you don't need these settings, use C no_detault_configs: 1 Defaults to 0, which means it uses the default (somewhat secure) config. =back =head1 AUTHOR Daisuke Murase Tatsuhiko Miyagawa =head1 SEE ALSO L, L =cut