#!/usr/bin/perl -w =head1 NAME mech-dump - Dumps information about a web page =cut use warnings; use strict; use WWW::Mechanize; use Getopt::Long; use Pod::Usage; my @actions; my $absolute; GetOptions( forms => sub { push( @actions, \&dump_forms ); }, links => sub { push( @actions, \&dump_links ); }, images => sub { push( @actions, \&dump_images ); }, all => sub { push( @actions, \&dump_forms, \&dump_links, \&dump_images ); }, absolute => \$absolute, help => sub { pod2usage(1); }, ) or pod2usage(2); =head1 SYNOPSIS mech-dump [options] [file|url] Options: --forms Dump table of forms (default action) --links Dump table of links --images Dump table of images --all Dump all three of the above, in that order --absolute Show URLs as absolute, even if relative in the page --help Show this message The order of the options specified is relevant. Repeated options get repeated dumps. =cut my $uri = shift or die "Must specify a URL or file to check\n"; if ( -e $uri ) { require URI::file; $uri = URI::file->new_abs( $uri )->as_string; } @actions = (\&dump_forms) unless @actions; my $mech = WWW::Mechanize->new( cookie_jar => undef ); my $response = $mech->get( $uri ); $response->is_success or die "Can't fetch $uri\n", $response->status_line, "\n"; $mech->is_html or die qq{$uri returns type "}, $mech->ct, qq{", not "text/html"\n}; for my $action ( @actions ) { $action->( $mech ); } sub dump_links { my $mech = shift; for my $link ( $mech->links ) { my $url = $absolute ? $link->url_abs : $link->url; print "$url\n"; } return; } sub dump_images { my $mech = shift; for my $image ( $mech->images ) { my $url = $absolute ? $image->url_abs : $image->url; print "$url\n"; } return; } sub dump_forms { my $mech = shift; for my $form ( $mech->forms() ) { print $form->dump; print "\n"; } return; } =head1 TODO =over 4 =item * Options for C<--user>, C<--pass> and C<--proxy>. =back =cut