#!/usr/bin/perl -w package main; use warnings; use strict; use CAM::PDF; use Getopt::Long; use Pod::Usage; our $VERSION = '1.57'; my %opts = ( verbose => 0, help => 0, version => 0, ); Getopt::Long::Configure('bundling'); GetOptions('v|verbose' => \$opts{verbose}, 'h|help' => \$opts{help}, 'V|version' => \$opts{version}, ) or pod2usage(1); if ($opts{help}) { pod2usage(-exitstatus => 0, -verbose => 2); } if ($opts{version}) { print "CAM::PDF v$CAM::PDF::VERSION\n"; exit 0; } if (@ARGV < 1) { pod2usage(1); } my $file = shift; my $doc = CAM::PDF->new($file) || die "$CAM::PDF::errstr\n"; my $pages = $doc->numPages(); my $nimages = 0; for my $p (1..$pages) { my $c = $doc->getPageContent($p); my @parts = split /(\/[\w]+\s*Do)\b/xms, $c; foreach my $part (@parts) { if ($part =~ /\A(\/[\w]+)\s*Do\z/xms) { $nimages++; my $ref = $1; my $xobj = $doc->dereference($ref, $p); my $objnum = $xobj->{objnum}; my $im = $doc->getValue($xobj); my $l = $im->{Length} || $im->{L} || 0; if ($l) { $l = $doc->getValue($l); } my $w = $im->{Width} || $im->{W} || 0; if ($w) { $w = $doc->getValue($w); } my $h = $im->{Height} || $im->{H} || 0; if ($h) { $h = $doc->getValue($h); } print "Image $nimages page $p, (w,h)=($w,$h), ref $ref = object $objnum, length $l\n"; } else { # Ths code may break if there is are legitimate strings "BI", # "ID" and "EI" in order in the page (which happened in the # PDF reference doc, of course! BI: while ($part =~ s/.*?\bBI\b\s*//xms) { my ($im) = $part =~ s/\A(.*?)\s*\bEI\b\s*//xms; next BI if (!$im); $im =~ s/\A.*\bBI\b//xms; # this may get rid of a fake BI if there is one in the page # Easy tests: next BI if ($im =~ m/ \A [)] /xms); next BI if ($im =~ m/ [(] \z /xms); next BI if ($im !~ m/ \bID\b /xms); # make sure that there is an open paren before every close # if not, then the "BI" was part of a string my $test = $im; $test =~ s/ \\[()] //gxms; # get rid of escaped parens for the test while ($test =~ s/ \A(.*?) [)] //xms) { my $bit = $1; next BI if ($bit !~ m/ [(] /xms); } $nimages++; my $w = 0; my $h = 0; if ($im =~ m/ \/W(|idth)\s*(\d+) /xms) { $w = $2; } if ($im =~ m/ \/H(|eight)\s*(\d+) /xms) { $h = $2; } print "Image $nimages page $p, (w,h)=($w,$h), inline\n"; } } } } __END__ =for stopwords listimages.pl =head1 NAME listimages.pl - Save copies of all PDF JPG images to a directory =head1 SYNOPSIS listimages.pl [options] infile.pdf Options: -v --verbose print diagnostic messages -h --help verbose help message -V --version print CAM::PDF version =head1 DESCRIPTION Searches the PDF for images and lists them on STDOUT in one of the following formats: Image page

, (w,h)=(,), ref

, (w,h)=(,), inline =head1 SEE ALSO CAM::PDF F F F F =head1 AUTHOR See L =cut