package HTML::LinkChanger; # Version: $Id: LinkChanger.pm 4 2007-10-05 15:51:37Z sergey.chernyshev $ use strict; use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); require Exporter; require AutoLoader; @ISA = qw(Exporter AutoLoader HTML::Parser); # Items to export into callers namespace by default. Note: do not export # names by default without a very good reason. Use EXPORT_OK instead. # Do not simply export all your public functions/methods/constants. @EXPORT = qw( ); $VERSION = sprintf("2.%d", q$Rev: 4 $ =~ /(\d+)/); # Preloaded methods go here. # Autoload methods go after =cut, and are processed by the autosplit program. require HTML::Parser; require HTML::Tagset; sub new { my $class = shift; my %args = @_; my $url_filters = $args{'url_filters'}; # reference to array HTML::LinkChanger::URLFilter objects # or reference to one such object my $self = $class->SUPER::new( api_version => 3, default_h => [sub { my $self = shift; $self->{_filtered_html} .= shift }, 'self,text'], start_h => ['link_tag_start', 'self,tagname,text,attr,attrseq'], ); # initializing transforming functions array if (ref($url_filters) eq 'ARRAY') { foreach (@{$url_filters}) { die "Array must contain only HTML::LinkChanger::URLFilter objects" unless UNIVERSAL::isa($_, 'HTML::LinkChanger::URLFilter'); } $self->{url_filters} = $url_filters; } elsif (UNIVERSAL::isa($url_filters, 'HTML::LinkChanger::URLFilter')) { $self->{url_filters} = [$url_filters]; } else { $self->{url_filters} = []; # empty array - can add more filters later } $self; } sub link_tag_start { my($self, $tag, $text, $attr, $attrseq) = @_; my $link_attrs = $HTML::Tagset::linkElements{$tag}; if ($link_attrs) { $link_attrs = [$link_attrs] unless ref $link_attrs; for my $link_attr (@$link_attrs) { next unless exists $attr->{$link_attr}; $attr->{$link_attr} = $self->change_url( $attr->{$link_attr}, $tag, $link_attr ); } my $output='<'.$tag; foreach my $attribute (@$attrseq) { $output.=' '.$attribute.'="'.$attr->{$attribute}.'"'; } $output.='>'; $self->{_filtered_html} .= $output; } else { $self->{_filtered_html} .= $text; } } sub filter { my $self = shift; delete $self->{_filtered_html}; $self->parse(@_); $self->eof; return $self->{_filtered_html}; } sub filter_file { my $self = shift; delete $self->{_filtered_html}; $self->parse_file(@_); $self->eof; return $self->{_filtered_html}; } sub filtered_html { my $self = shift; return $self->{_filtered_html}; } sub change_url { my $self = shift; my $url = shift; # url of the link my $tag = shift; # tag containing a link to change my $attr = shift; # attribute containing a link to change foreach my $filter (@{$self->{url_filters}}) { $url = $filter->url_filter( url => $url, tag => $tag, attr => $attr ); } return $url; # abstract class just keeps everything as it is } 1; __END__ =head1 NAME HTML::LinkChanger - abstract Perl class to change all linking URLs in HTML. =head1 SYNOPSIS BEGIN { package Http2Ftp; require HTML::LinkChanger; use vars qw(@ISA); @ISA = qw(HTML::LinkChanger); # # Converting http URLs to FTP urls # sub change_url { my $self = shift; my $url = shift; $url=~s/^http:/ftp:/; return $url; } } my $http2ftp = new Http2Ftp(); my $converted_HTML = $http2ftp->filter($original_HTML); =head1 DESCRIPTION HTML::LinkChanger is an abstract class so you need to subclass it to make it do something. See HTML::LinkChanger::Absolutizer for one useful example of such class. HTML::LinkChanger uses HTML::Tagset::linkElements to change all attributes that contain links that needs to be updated. This class is a subclass of HTML::Parser. You can call filter() method to convert scalar containing HTML or filter_file() method to convert HTML from file. You can also call conventional HTML::Parser's parse() and parse_file() methods and call filtered_html() after that to retreive results. =head1 AUTHOR Sergey Chernyshev =head1 SEE ALSO HTML::Parser, HTML::Tagset, HTML::LinkChanger::Absolutizer =cut