package PDF::FromHTML; $PDF::FromHTML::VERSION = '0.12'; use 5.006; use strict; use warnings; BEGIN { foreach my $method ( qw( pdf twig tidy args ) ) { no strict 'refs'; *$method = sub { $#_ ? ($_[0]{$method} = $_[1]) : $_[0]{$method} }; } } use Cwd; use File::Temp; use File::Basename; use PDF::Writer; use PDF::Template; use PDF::FromHTML::Twig; use constant HAS_UNICODE_SUPPORT => ($] >= 5.008); use constant PDF_WRITER_BACKEND => do { local $@; # For Perl 5.6.x, we have to use pdflib PDF::Writer->import('pdflib') unless HAS_UNICODE_SUPPORT(); eval { ref(PDF::Writer->new) } or die( "Please install PDF::API2 (preferred) or pdflib_pl first" ); }; use constant HAS_HTML_TIDY => do { local $@; eval { require HTML::Tidy; 1 } or do { unless ( eval { require XML::Clean; 1 } ) { die( "Please install HTML::Tidy (preferred) or XML::Clean first" ); } 0; # Has XML::Clean but no HTML::Tidy }; }; =head1 NAME PDF::FromHTML - Convert HTML documents to PDF =head1 VERSION This document describes version 0.12 of PDF::FromHTML, released December 10, 2005. =head1 SYNOPSIS my $pdf = PDF::FromHTML->new( encoding => 'utf-8' ); $pdf->load_file('source.html'); $pdf->convert( Font => '/path/to/font.ttf', LineHeight => 10, Landscape => 1, ); $pdf->write_file('target.pdf'); =head1 DESCRIPTION This module transforms HTML into PDF, using an assortment of XML transformations implemented in L. There is also a command-line utility, L, that comes with this distribution. =head1 PUBLIC METHODS =cut sub new { my $class = shift; bless({ twig => PDF::FromHTML::Twig->new, args => { @_ }, }, $class); } sub load_file { my ($self, $file) = @_; $self->{file} = $file; } sub parse_file { my $self = shift; my $file = $self->{file}; my $content = ''; my $dir = Cwd::getcwd(); if (!ref $file) { open my $fh, $file or die $!; chdir File::Basename::dirname($file); $content = do { local $/; <$fh> }; } else { $content = $$file; } my $encoding = ($self->args->{encoding} || 'utf8'); if (HAS_UNICODE_SUPPORT() and $self->args) { require Encode; $content = Encode::decode($encoding, $content, Encode::FB_XMLCREF()); } $content =~ s{ }{}g; $content =~ s{}{}gs; if (HAS_HTML_TIDY()) { if (HAS_UNICODE_SUPPORT()) { $content = Encode::encode( ascii => $content, Encode::FB_XMLCREF()); } $content = HTML::Tidy->new->clean( '', '', $content, ); } else { $content =~ s{&#(\d+);}{chr $1}eg; $content =~ s{&#x([\da-fA-F]+);}{chr hex $1}eg; $content = XML::Clean::clean($content, '1.0', { encoding => 'UTF-8' }); $content =~ s{<(/?\w+)}{<\L$1}g; } $self->twig->parse( $content ); chdir $dir; } =head2 convert(%params) Convert the loaded file to PDF. Valid parameters are: PageWidth 640 PageResolution 540 FontBold 'HelveticaBold' FontOblique 'HelveticaOblique' FontBoldOblique 'HelveticaBoldOblique' LineHeight 12 FontUnicode 'Helvetica' Font (same as FontUnicode) PageSize 'A4' Landscape 0 =cut sub convert { my ($self, %args) = @_; { # import arguments into Twig parameters no strict 'refs'; ${"PDF::FromHTML::Twig::$_"} = $args{$_} foreach keys %args; } $self->parse_file; my ($fh, $filename) = File::Temp::tempfile( SUFFIX => '.xml', UNLINK => 1, ); binmode($fh); if (HAS_UNICODE_SUPPORT()) { binmode($fh, ':utf8'); } # XXX HACK! XXX my $text = $self->twig->sprint; $text =~ s{\$(__[A-Z_]+__)}{}g; print $fh $text; close $fh; # print STDERR "==> Temp file written to $filename\n"; local $^W; $self->pdf(eval { PDF::Template->new( filename => $filename ) }) or die "$filename: $@"; $self->pdf->param(@_); } sub write_file { my $self = shift; local $^W; $self->pdf->write_file(@_); } 1; =head1 HINTS & TIPS =head2 EimgE tags Add the height and width attributes if you are creating the source HTML, it keeps PDF::FromHTML from having to open and read the source image file to get the real size. Less file I/O means faster processing. =head1 SEE ALSO L, L, L. =head1 AUTHORS Audrey Tang Eautrijus@autrijus.orgE =head1 CONTRIBUTORS Charleston Software Associates Einfo@charletonsw.comE =head1 COPYRIGHT Copyright 2004, 2005 by Audrey Tang Eautrijus@autrijus.orgE. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. See L =cut