package HTML::ResolveLink;
use strict;
our $VERSION = '0.02';
use base qw(HTML::Parser);
use Carp;
use HTML::Tagset ();
use URI;
sub new {
my($class, %p) = @_;
my $self = $class->SUPER::new(
start_h => [ \&_start_tag, "self,tagname,attr,attrseq,text" ],
default_h => [ \&_default, "self,tagname,attr,text" ],
);
unless ($p{base}) {
Carp::croak("HTML::ResolveLink->new: base is a required parameter");
}
$p{base} = URI->new($p{base}) unless ref $p{base};
$self->{resolvelink_base} = $p{base};
$self;
}
sub _start_tag {
my($self, $tagname, $attr, $attrseq, $text) = @_;
if ($tagname eq 'base' && defined $attr->{href}) {
$self->{resolvelink_base} = $attr->{href};
}
my $base = $self->{resolvelink_base};
my $links = $HTML::Tagset::linkElements{$tagname} || [];
$links = [$links] unless ref $links;
for my $a (@$links) {
next unless exists $attr->{$a};
my $link = $attr->{$a};
my $uri = URI->new($link);
# relative link:
unless (defined $uri->scheme) {
$uri = $uri->abs($base);
$attr->{$a} = $uri->as_string;
$self->{resolvelink_count}++;
}
}
$self->{resolvelink_html} .= "<$tagname";
for my $a (@$attrseq) {
next if $a eq '/';
$self->{resolvelink_html} .= sprintf qq( %s="%s"), $a, _escape($attr->{$a});
}
$self->{resolvelink_html} .= ' /' if $attr->{'/'};
$self->{resolvelink_html} .= '>';
}
sub _default {
my($self, $tagname, $attr, $text) = @_;
$self->{resolvelink_html} .= $text;
}
my %escape = (
'<' => '<',
'>' => '>',
'"' => '&qout;',
'&' => '&',
);
my $esc_re = join '|', keys %escape;
sub _escape {
my $str = shift;
$str =~ s/($esc_re)/$escape{$1}/g;
$str;
}
sub resolve {
my($self, $html) = @_;
# init
$self->{resolvelink_html} = '';
$self->{resolvelink_count} = 0;
$self->parse($html);
$self->eof;
$self->{resolvelink_html};
}
sub resolved_count {
my $self = shift;
$self->{resolvelink_count};
}
1;
__END__
=head1 NAME
HTML::ResolveLink - Resolve relative links in (X)HTML into absolute URI
=head1 SYNOPSIS
use HTML::ResolveLink;
my $resolver = HTML::ResolveLink->new(
base => 'http://www.example.com/foo/bar.html',
);
$html = $resolver->resolve($html);
if ($resolver->resolved_count) {
...
}
=head1 DESCRIPTION
HTML::ResolveLink is a module to rewrite relative links in XHTML or
HTML into absolute URI.
For example. when you have
foo
and use C as C URL, you'll get:
foo
If the parser encounters C<< >> tag in HTML, it'll honor that.
=head1 METHODS
=over 4
=item new
my $resolver = HTML::ResolveLink->new(
base => 'http://www.example.com/',
);
C is a required parameter, which is used to resolve the relative
URI found in the document.
=item resolve
$html = $resolver->resolve($html);
Resolves relative URI found in C<$html> into absolute and returns a
string containing rewritten one.
=item resolved_count
$count = $resolver->resolved_count;
Returns how many URIs are resolved during the previous I
method call. This should be called after the I, otherwise
returns undef.
=head1 AUTHOR
Tatsuhiko Miyagawa Emiyagawa@bulknews.netE
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=head1 SEE ALSO
L, L
=cut