package HTML::Mail;
our $VERSION = '0.11';
$VERSION = eval $VERSION; # see L
# Preloaded methods go here.
use LWP::UserAgent;
require URI;
require HTML::Parser;
require MIME::Lite;
use Carp qw(carp croak);
use strict;
use warnings;
our @ISA = qw(HTML::Parser);
our $SIMPLE_CID;
our $AUTOLOAD;
#see if Data::UUID if present and use cid generation possible
eval{
require Data::UUID;
};
if($@){
#UUID not present, use fallback cid generation (should be OK)
*gen_cid =
sub{
my ($self, $uri) = @_;
return time."_${$}_".(int rand(100_000)).'_'.$self->{'cid'}++;
};
}else{
#UUID present, cids will be universally unique
my $UUID = Data::UUID->new();
*gen_cid =
sub{
my ($self, $uri) = @_;
return $UUID->to_string($UUID->create);
};
}
sub new {
my ($package, %params) = @_;
my $self = bless {}, $package;
$self->{'_original_params'} = {};
$self->{'_cache'} = {};
$self->build(%params);
return $self;
}
#Default LWP::UserAgent
sub _set_default_lwp_ua {
my $self = shift;
$self->{'_ua'} ||= LWP::UserAgent->new(
'agent' => 'HTML::Mail',
'timeout' => 60,
);
return $self;
}
sub build {
my ($self, %params) = @_;
$self->_reset_html;
%params = (%{$self->{'_original_params'}}, %params);
if (exists($params{'HTML'})) {
$self->{'HTML'} = $params{'HTML'};
}else {
croak "No HTML parameter send";
}
if (exists($params{'Text'})) {
$self->{'Text'} = $params{'Text'};
}
if ($params{'lwp_ua'}) {
if($params{'lwp_ua'}->isa('LWP::UserAgent')){
$self->{'_ua'} = $params{'lwp_ua'};
}else{
carp "lwp_ua attribute is not a LWP::UserAgent. Using default.";
}
}
for my $key qw(inline_css strict_download) {
$self->{$key} = exists($params{$key}) ? $params{$key} : 1;
}
#by default don't attach anything linked
$self->{'attach_uri'} = sub {return 1;};
if(exists($params{'attach_uri'})){
if(ref($params{'attach_uri'}) eq 'CODE'){
$self->{'attach_uri'} = $params{'attach_uri'};
}else{
carp "attach_uri specified but not a subroutine reference. Ignoring and using default.";
}
}
#by default don't attach anything linked
$self->{'attach_links'} = sub {return 0;};
if(exists($params{'attach_links'})){
if(ref($params{'attach_links'}) eq 'CODE'){
$self->{'attach_links'} = $params{'attach_links'};
}else{
carp "attach_links specified but not a subroutine reference. Ignoring and using default.";
}
}
$self->{'html_charset'} = $params{'html_charset'} || 'iso-8859-15';
$self->{'text_charset'} = $params{'html_charset'} || 'iso-8859-15';
$self->{'_original_params'} = \%params;
$self->_set_default_lwp_ua();
$self->{'_message'} = MIME::Lite->new(
%params,
Type => $self->{'Text'} ?
'multipart/alternative' :
'multipart/related',
);
$self->_parse_html;
if($self->{'Text'}) {
$self->_attach_text;
}
$self->_attach_media;
return $self;
}
sub _parse_html {
my $self = shift;
#set up the HTML parser
$self->init(
api_version => 3,
start_h => [\&_tag_start, 'self, tag, attr, attrseq'],
end_h => [\&_tag_end, 'self, tag, attr, attrseq'],
text_h => [\&_tag_end, 'self, text'],
);
my $content = $self->{'_cache'}->{$self->{'HTML'}};
if(defined($content)){
$self->parse($content);
return $self;
}
my $response;
eval { $response = $self->_get($self->{'HTML'}, 1, 1); };
if ($@ or not ($response and $response->is_success)) {
delete($self->{'_html_base'});
if ($self->{'HTML'} =~ /<\s*html.*>/i) {
#HTML is the content itself
$self->parse($self->{'HTML'});
}else {
#couldn't get HTML so can't do anything
die $@;
}
}
else {
$self->{'_html_base'} = $response->base();
$self->{'_cache'}->{$self->{'HTML'}} = $response->content;
$self->parse($response->content);
}
return $self;
}
#Return LWP::UserAgent used to make requests
#Allows user to fine tune options
sub lwp_ua{
my ($self, $ua) = @_;
if (ref($ua) && $ua->isa('LWP::UserAgent')){
$self->{'_ua'} = $ua;
}
return $self->{'_ua'};
}
#default behaviour: attach all media to email
sub attach_uri{
my ($self, $url) = @_;
return $self->{'attach_uri'}->($url);
}
#Makes a GET request and returns the response
sub _get {
my ($self, $uri, $nowarn, $die) = @_;
if (!$self || !$self->{'_ua'}) {
die "User agent not defined";
}
if (!$uri) {
die "uri not defined";
}
my $response = $self->{'_ua'}->get($uri);
if ($response->is_success) {
return $response;
}else{
my $uri2 = $response->request->uri;
my $error = "Error while making request [GET ". $uri. ($uri eq $uri2 ? "]" : " -> [$uri2]")."\n". $response->status_line;
if( $self->{'strict_download'} or $die){
die $error;
}else{
unless( $nowarn ){
carp $error;
}
#undef by default
return;
}
}
}
sub _add_html {
my ($self, $tag, $attr, $attrseq) = @_;
my $content = \$self->{'html_content'};
if ($#_ == 1) {
$$content .= $tag; #actually just text
return;
}
#special treatment for tags that end with /
my $empty;
if($attr->{'/'} && $attr->{'/'} eq '/' && $attrseq){
pop @$attrseq;
$empty = 1;
}
$$content .= "<$tag";
if ($attrseq && @$attrseq) {
$$content .= qq/ $_="$attr->{$_}"/ for (@$attrseq);
}
else {
while (my ($k, $v) = each(%$attr)) {
$$content .= qq/ $k="$v"/;
}
}
$$content .= " /" if $empty;
$$content .= ">";
return $self;
}
sub _get_html {
return shift->{'html_content'};
}
sub _create_uri {
my $self = shift;
defined($_[0]) or die "need a link to create a uri";
my $base = $self->{'_html_base'};
if(defined($base)){
return URI->new_abs($_[0], $base);
}else{
return URI->new($_[0]);
}
}
sub _add_link {
my ($self, $uri) = @_;
if(!exists($self->{'links'}->{$uri})){
my $cid = ($SIMPLE_CID ? $self->{'cid'}++: $self->gen_cid($uri));
$self->_get_media($uri, $cid);
}
if ( exists( $self->{'links'}->{$uri} ) ) {
return $self->{'links'}->{$uri}->[0];
}
else {
return;
}
}
sub _get_inline_content {
my $self = shift;
my $uri = $self->_create_uri ($_[0]);
my $response = $self->_get($uri);
if( defined $response ){
return $self->{'inline_links'}->{$uri} ||= $self->_get($uri)->content;
}else{
return '';
}
}
sub _get_links {
return shift->{'links'} || {};;
}
sub _reset_links {
my $self = shift;
$self->{'links'} = {};
$self->{'inline_links'} = {};
$self->{'cid'} = 0;
return $self;
}
sub _reset_html {
shift->{'html_content'} = '';
}
sub _tag_start {
my $self = shift;
my ($tag, $attr, $attrseq) = @_;
if ($tag eq 'base' and not $self->{'_html_base'}) {
$self->{'_html_base'} = $attr->{'href'};
}elsif (
($tag eq 'link') &&
($attr->{'rel'} &&
$attr->{'rel'} eq 'stylesheet') &&
exists($attr->{'href'})
){
if($self->{'inline_css'}){
return $self->_add_inline_content(@_);
}else{
$self->_tag_filter_link($attr, 'href');
}
}elsif($tag eq 'a' and defined($attr->{'href'})){
$attr->{'href'} = $self->_create_uri($attr->{'href'});
}
$self->_tag_filter_link($attr, 'background');
$self->_tag_filter_link($attr, 'src') if ($tag ne 'script');
#selective attach of linked media
if(defined($attr->{'href'})){
if($self->{'attach_links'}->($self->_create_uri($attr->{'href'}))){
$self->_tag_filter_link($attr, 'href');
}
}
$self->_add_html(@_);
return $self;
}
sub _add_inline_content{
my $self = shift;
my ($tag, $attr) = @_;
my $link = $attr->{'href'};
my $content = $self->_get_inline_content($link);
#try to make easy to generalise in the future
#javascript and other things may one day be inlined
if($tag eq 'link' && $attr->{'rel'} eq 'stylesheet'){
$tag = 'style';
delete($attr->{'href'});
}
$self->_add_html($tag, $attr);
$self->{'html_content'} .= "\n $content \n$tag>";
return $self;
}
sub _tag_filter_link {
my ($self, $attrs, $attr) = @_;
if (exists($attrs->{$attr})) {
my $uri = $self->_create_uri ($attrs->{$attr});
if($self->attach_uri($uri)){
my $cid = $self->_add_link($uri);
if(defined $cid){
$attrs->{$attr} = "cid:" . $cid;
}else{
#just remove content
$attrs->{$attr} = '';
}
}else{
#place absolute url just in case
$attrs->{$attr} = $uri->as_string;
}
}
return $self;
}
sub _tag_end {
shift->_add_html(@_);
}
sub _tag_text {
shift->_add_html(@_);
}
sub _attach_media {
my $self = shift;
my $related;
if($self->{'Text'}) {
$related = MIME::Lite->new(
'Type' => 'multipart/related',
'Datestamp' => undef,
'Disposition' => 'inline',
);
}else{
$related = $self->{'_message'};
}
my $html_part = MIME::Lite->new(
'Type' => 'text/html',
'Encoding' => 'quoted-printable',
'Data' => $self->_get_html,
'Disposition' => 'inline',
'Datestamp' => undef,
);
$html_part->attr('content-type.charset' => $self->{'html_charset'});
#attach the html part
$related->attach($html_part);
if($SIMPLE_CID){
#needs to be sorted in order to run the build tests
#otherwise the order depends on the hashing function and threrefore on perl's version
#TODO beter tests
my %links = %{ $self->_get_links };
for (sort keys %links) {
$related->attach($links{$_}->[1]);
}
}else{
while ( my ( $link, $media ) = each( %{ $self->_get_links } ) ) {
$related->attach( $media->[1] );
}
}
if($self->{'Text'}){
$self->{'_message'}->attach($related);
}
return $self;
}
sub _get_media {
my ( $self, $uri, $cid ) = @_;
my $response = $self->_get($uri);
if ( $response
and $response->can('content')
and $response->can('content_type') )
{
my $part = MIME::Lite->new(
'Encoding' => 'base64',
'Disposition' => 'attachment',
'Data' => $response->content,
'Datestamp' => undef,
);
$part->attr( 'Content-type' => $response->content_type );
$part->attr( 'Content-ID' => "<$cid>" );
$self->{'links'}->{$uri} = [ $cid, $part ];
}
return $self;
}
sub _attach_text {
my $self = shift;
my $text = $self->{'Text'};
my $content = $self->{'_cache'}->{$text};
if(!defined($content)){
eval { $content = $self->_get($text, 1, 1)->content; };
if(not $content or $@){
$content = $text;
}
$self->{'_cache'}->{$text} = $content;
}
my $text_part = new MIME::Lite(
'Type' => 'TEXT',
'Encoding' => 'quoted-printable',
'Disposition' => 'inline',
'Data' => $content,
'Datestamp' => undef,
);
$text_part->attr('content-type.charset' => $self->{'text_charset'});
$self->{'_message'}->attach($text_part);
return $self;
}
sub dump {
require Data::Dumper;
my $self = shift;
return Data::Dumper->Dump([$self], [qw(html_mail_dump)]);
}
sub dump_file {
my ($self, $fname) = @_;
my $file;
open $file, ">$fname" or croak "Error openning file $fname for writting.\n$!";
print $file $self->dump;
return $self;
}
sub restore {
my (undef, $data) = @_;
my $html_mail_dump;
eval "$data";
return $html_mail_dump;
}
sub restore_file {
my ($package, $fname) = @_;
my $file;
open $file, "<$fname" or croak "Error openning file $fname for reading.\n$!";
{
local $/;
my $data = <$file>;
return $package->restore($data);
}
return;
}
sub AUTOLOAD {
my $self = shift;
$AUTOLOAD =~ s/.*:://;
if($self->{'_message'} && $self->{'_message'}->can($AUTOLOAD)){
return $self->{'_message'}->$AUTOLOAD(@_);
}
}
1;
__END__
# Below is stub documentation for your module. You'd better edit it!
=encoding utf8
=head1 NAME
HTML::Mail - Perl extension for sending emails with embedded HTML and media
=head1 SYNOPSIS
use HTML::Mail;
### initialisation
my $html_mail = HTML::Mail->new(
HTML => 'http://www.cpan.org',
Text => 'This is the text representation of the webpage http://www.cpan.org',
From => 'me@myhost.org',
To => 'you@yourhost.org',
Subject => 'CPAN webpage');
### Send the email ("inherited" from MIME::Lite)
$html_mail->send();
#### Remove text representation
$html_mail->set_Text();
### Rebuild the message and send
$html_mail->build->send;
### Serialise to file for later reuse
$html_mail->dump_file('/tmp/cpan_mail.data');
### Restore from file
my $restored = HTML::Mail->restore_file('/tmp/cpan_mail.data');
=head1 DESCRIPTION
B is supposed to help with the task of sending emails with HTML and images (or other media) embedded or externally linked.
It uses L for all MIME related jobs, L to find related files and change the URIs and L to retrieve the related files.
Email can be 'multipart/alternative' if both HTML and Text content exist and 'multipart/related' if there is only HTML content.
If all you want is to send text-only email, you probably won't find this module useful at all, or at best a huge overkill.
=head2 Method Summary
=over 4
=item new
Constructor.
Initialises the object.
See the L section
=item build
Regenerates the email. Allows you to change any attributes as in the constructor.
Main difference with C is that it doesn't fetch content that was previously fetched/parsed.
=item lwp_ua
Returns the L object used internally so that it can the customized
=item dump
Serializes the object to a string
=item dump_file
Serializes the object to a file
=item restore
Restores previously serialized object from a string
=item restore_file
Restores previously serialized object from a file
=item gen_cid
Method to generate cids.
Receives $self and the uri to associate the cid to.
If you need to generate your own cids (say, add www.host.com) you should subclass this method.
=back
=head2 Attributes
All attributes are B.
my $html_mail = HTML::Mail->new(attribute => value);
$html_mail->build(attribute => value);
Constructor supports these attributes:
=over 4
=item HTML [URI or STRING]
The URL of HTML data to send in email.
Most common URLs are either F or F
If you prefer, you can use it to specify the actual HTML data as a string
HTML=>'Welcome to HTML::Mail
';
=item Text [URI or STRING]
The URL of Text data to send in email. Similar to the HTML attribute. You can also specify the actual text data as a string.
=item From, To, Subject
Inherited from L. Sender, Recipient and Subject of the message.
=item html_charset
Charset of the HTML part of the email. Defaults to I.
=item text_charset
Charset of the text part of the email. Defaults to I.
=item lwp_ua
L object used to retrieve documents.
The default agent has a 60 second timeout and sends I as the agent.
See also L.
=item inline_css
A true value specifies that when the HTML uses external css this content be placed in the