package Plagger::Plugin::Filter::TruePermalink;
use strict;
use base qw( Plagger::Plugin );
use DirHandle;
use YAML;
use Plagger::UserAgent;
use URI;
use URI::QueryParam;
sub init {
my $self = shift;
$self->SUPER::init(@_);
$self->load_plugins;
}
sub load_plugins {
my $self = shift;
my $dir = $self->assets_dir;
my $dh = DirHandle->new($dir) or Plagger->context->error("$dir: $!");
for my $file (grep -f $_->[0] && $_->[1] =~ /\.yaml$/,
map [ File::Spec->catfile($dir, $_), $_ ], sort $dh->read) {
$self->load_plugin(@$file);
}
}
sub load_plugin {
my($self, $file, $base) = @_;
Plagger->context->log(debug => "loading $file");
my $data = YAML::LoadFile($file);
if (ref($data) eq 'ARRAY') {
push @{$self->{redirectors}}, { follow_link => "^(?:" . join("|", @$data) . ")" };
} else {
push @{$self->{plugins}}, $data;
}
}
sub register {
my($self, $context) = @_;
$context->register_hook(
$self,
'update.entry.fixup' => \&update,
);
}
sub update {
my($self, $context, $args) = @_;
$self->rewrite(sub { $args->{entry}->link }, sub { $args->{entry}->link(@_) });
for my $enclosure ($args->{entry}->enclosures) {
$self->rewrite(sub { $enclosure->url }, sub { $enclosure->url( URI->new(@_) ) });
}
}
sub rewrite {
my($self, $getter, $callback) = @_;
my $loop;
while ($self->rewrite_link($getter, $callback)) {
if ($loop++ >= 100) {
Plagger->error("Possible infinite loop on " . $getter->());
}
}
}
sub rewrite_link {
my($self, $getter, $callback) = @_;
my $context = Plagger->context;
my $link = $getter->();
my $orig = $link; # copy
my $count = 0;
my $rewritten;
for my $plugin (@{ $self->{plugins}}) {
my $match = $plugin->{match} || '.'; # anything
next unless $link =~ m/$match/i;
if ($plugin->{rewrite}) {
local $_ = $link;
my $done = eval $plugin->{rewrite};
if ($@) {
$context->error("$@ in $plugin->{rewrite}");
} elsif ($done) {
$count += $done;
$rewritten = $_;
last;
}
} elsif ($plugin->{query_param}) {
my $param = URI->new($link)->query_param($plugin->{query_param})
or $context->error("No query param $plugin->{query_param} in " . $link);
$count++;
$rewritten = $param;
last;
}
}
unless ($count) {
for my $red (@{ $self->{redirectors} }) {
next unless $red->{follow_link};
if ($link =~ /$red->{follow_link}/i) {
my $url = $self->follow_redirect($link);
if ($url && $url ne $link) {
$count++;
$rewritten = $url;
last;
}
}
}
}
if ($count) {
$callback->($rewritten);
$context->log(info => "Link $orig rewritten to $rewritten");
}
return $count;
}
sub follow_redirect {
my($self, $link) = @_;
my $url = $self->cache->get_callback(
"redirector:$link",
sub {
my $ua = Plagger::UserAgent->new;
my $res = $ua->simple_request( HTTP::Request->new(GET => $link) );
if ($res->is_redirect) {
return $res->header('Location');
}
return;
},
'1 day',
);
Plagger->context->log(debug => "Resolving redirection of $link: $url") if $url;
return $url;
}
1;
__END__
=head1 NAME
Plagger::Plugin::Filter::TruePermalink - Normalize permalink using its own plugin files
=head1 SYNOPSIS
- module: Filter::TruePermalink
=head1 DESCRIPTION
This plugin normalizes permalink using YAML based URL pattern
files. Various permalink fix filters in the past (YahooBlogSearch,
Namaan, 2chRSSPermalink) can now be writting as a pattern file for
this plugin.
This plugin rewrites I attribute of C<$entry>, rather than
I. If C<$entry> has enclosures, this plugin also tries to
rewrite url of them.
=head1 PATTERN FILES
You can write your own pattern file using YAML data format. Usable keys are:
=over 4
=item author
Your name. (Optional)
=item match
Regular expression rule to match with entry's link. Rewrites only
happen when the URL form matches. You can omit this configuration to
apply the rewrite rule to any URLs.
=item rewrite
Replacement regexp to filter permalink. Permalink is stored in C<$_> variable so that you can write:
rewrite: s/;jsession_id=\w+//
=item query_param
URL query parameter to extract normalized permalink.
query_param: destination
=back
See C for more examples.
=head1 AUTHOR
youpy
Tatsuhiko Miyagawa
=head1 SEE ALSO
L
=cut