package WWW::Mechanize::Shell; use strict; use Carp; use WWW::Mechanize; use WWW::Mechanize::FormFiller; use HTTP::Cookies; use base qw( Term::Shell Exporter ); use FindBin; use File::Temp qw(tempfile); use URI::URL; use Hook::LexWrap; use HTML::Display qw(); use HTML::TokeParser::Simple; use B::Deparse; use vars qw( $VERSION @EXPORT %munge_map ); $VERSION = '0.46'; @EXPORT = qw( &shell ); =head1 NAME WWW::Mechanize::Shell - An interactive shell for WWW::Mechanize =head1 SYNOPSIS From the command line as perl -MWWW::Mechanize::Shell -eshell or alternatively as a custom shell program via : =for example begin #!/usr/bin/perl -w use strict; use WWW::Mechanize::Shell; my $shell = WWW::Mechanize::Shell->new("shell"); if (@ARGV) { $shell->source_file( @ARGV ); } else { $shell->cmdloop; }; =for example end =for example_testing BEGIN { require WWW::Mechanize::Shell; $ENV{PERL_RL} = 0; $ENV{COLUMNS} = '80'; $ENV{LINES} = '24'; }; BEGIN { no warnings 'once'; no warnings 'redefine'; *WWW::Mechanize::Shell::cmdloop = sub {}; *WWW::Mechanize::Shell::display_user_warning = sub {}; *WWW::Mechanize::Shell::source_file = sub {}; }; isa_ok( $shell, "WWW::Mechanize::Shell" ); =head1 DESCRIPTION This module implements a www-like shell above WWW::Mechanize and also has the capability to output crude Perl code that recreates the recorded session. Its main use is as an interactive starting point for automating a session through WWW::Mechanize. The cookie support is there, but no cookies are read from your existing browser sessions. See L on how to implement reading/writing your current browsers cookies. =head2 Cnew %ARGS> This is the constructor for a new shell instance. Some of the options can be passed to the constructor as parameters. By default, a file C<.mechanizerc> (respectively C under Windows) in the users home directory is executed before the interactive shell loop is entered. This can be used to set some defaults. If you want to supply a different filename for the rcfile, the C parameter can be passed to the constructor : rcfile => '.myapprc', =cut sub init { my ($self) = @_; my ($name,%args) = @{$self->{API}{args}}; $self->{agent} = WWW::Mechanize->new(); $self->{formfiller} = WWW::Mechanize::FormFiller->new(default => [ Ask => $self ]); $self->{history} = []; $self->{options} = { autosync => 0, warnings => (exists $args{warnings} ? $args{warnings} : 1), autorestart => 0, watchfiles => (exists $args{watchfiles} ? $args{watchfiles} : 1), cookiefile => 'cookies.txt', dumprequests => 0, dumpresponses => 0, verbose => 0, }; # Install the request dumper : $self->{request_wrapper} = wrap *LWP::UserAgent::request, pre => sub { $self->request_dumper($_[1]) if $self->option("dumprequests"); }, post => sub { $self->response_dumper($_[-1]) if $self->option("dumpresponses"); }; $self->{redirect_ok_wrapper} = wrap 'WWW::Mechanize::redirect_ok', post => sub { return unless $_[1]; $self->status( "\nRedirecting to ".$_[1]->uri."\n" ); $_[-1] }; # Load the proxy settings from the environment $self->agent->env_proxy(); # Read our .rc file : # I could use File::Homedir, but the docs claim it dosen't work on Win32. Maybe # I should just release a patch for File::Homedir then... Not now. my $sourcefile; if (exists $args{rcfile}) { $sourcefile = delete $args{rcfile}; } else { my $userhome = $^O =~ /win32/i ? $ENV{'USERPROFILE'} || $ENV{'HOME'} : ((getpwuid($<))[7]); $sourcefile = "$userhome/.mechanizerc" if -f "$userhome/.mechanizerc"; }; $self->option('cookiefile', $args{cookiefile}) if (exists $args{cookiefile}); $self->source_file($sourcefile) if defined $sourcefile; $self->{browser} = undef; # Keep track of the files we consist of, to enable automatic reloading $self->{files} = undef; if ($self->option('watchfiles')) { eval { my @files = grep { -f && -r && $_ ne '-e' } values %INC; local $, = ","; require File::Modified; $self->{files} = File::Modified->new(files=>[@files]); }; $self->display_user_warning( "Module File::Modified not found. Automatic reloading disabled.\n" ) if ($@); }; }; =head2 C<$shell-Erelease_agent> Since the shell stores a reference back to itself within the WWW::Mechanize instance, it is necessary to break this circular reference. This method does this. =cut sub release_agent { my ($self) = @_; undef $self->{request_wrapper}; $self->{agent} = undef; }; =head2 C<$shell-Esource_file FILENAME> The C method executes the lines of FILENAME as if they were typed in. $shell->source_file( $filename ); =cut sub source_file { my ($self,$filename) = @_; local $_; # just to be on the safe side that we don't clobber outside users of $_ local *F; open F, "< $filename" or die "Couldn't open '$filename' : $!\n"; while () { $self->cmd($_); warn "cmd: $_" if $self->{options}->{verbose}; }; close F; }; sub add_history { my ($self,@code) = @_; push @{$self->{history}},[$self->line,join "",@code]; }; =head2 C<$shell-Edisplay_user_warning> All user warnings are routed through this routine so they can be rerouted / disabled easily. =cut sub display_user_warning { my ($self,@message) = @_; warn @message if $self->option('warnings'); }; =head2 C<$shell-Eprint_paged LIST> Prints the text in LIST using C<$ENV{PAGER}>. If C<$ENV{PAGER}> is empty, prints directly to C. Most of this routine comes from the C utility. =cut sub print_paged { my $self = shift; if ($ENV{PAGER} and -t STDOUT) { my ($fh,$filename) = tempfile(); print $fh $_ for @_; close $fh; my @pagers = ($ENV{PAGER},qq{"$^X" -p}); foreach my $pager (@pagers) { if ($^O eq 'VMS') { last if system("$pager $filename") == 0; # quoting prevents logical expansion } else { last if system(qq{$pager "$filename"}) == 0; } }; unlink $filename or $self->display_user_warning("Couldn't unlink tempfile $filename : $!\n"); } else { print $_ for @_; }; }; sub agent { $_[0]->{agent}; }; sub option { my ($self,$option,$value) = @_; if (exists $self->{options}->{$option}) { my $result = $self->{options}->{$option}; if (scalar @_ == 3) { $self->{options}->{$option} = $value; }; $result; } else { Carp::carp "Unknown option '$option'"; undef; }; }; sub restart_shell { if ($0 ne '-e') { print "Restarting $0\n"; exec $^X, $0, @ARGV; }; }; sub precmd { my $self = shift @_; # We want to restart when any module was changed if ($self->{files} and $self->{files}->changed()) { print "One or more of the base files were changed\n"; $self->restart_shell if ($self->option('autorestart')); }; $self->SUPER::precmd(@_); }; sub browser { my ($self) = @_; $self->{browser} ||= HTML::Display->new(); $self->{browser}; }; sub sync_browser { my ($self) = @_; # We only can display html if we have any : return unless $self->agent->res; # Prepare the HTML for local display : my $unclean = $self->agent->res->content; my $html = ''; # ugly fix: # strip all target='_blank' attributes from the HTML: my $p = HTML::TokeParser::Simple->new(\$unclean); while (my $token = $p->get_token) { $token->delete_attr('target') if $token->is_start_tag; $html .= $token->as_is; }; my $location = $self->agent->{uri}; my $browser = $self->browser; $browser->display( html => $html, location => $location ); }; sub prompt_str { my $self = shift; if ($self->agent->response) { return ($self->agent->uri || "") . ">" } else { return "(no url)>" }; }; sub request_dumper { print $_[1]->as_string }; sub response_dumper { print $_[1]->as_string }; sub re_or_string { my ($self,$arg) = @_; if ($arg =~ m!^/(.*)/([imsx]*)$!) { my ($re,$mode) = ($1,$2); $re =~ s!([^\\])/!$1\\/!g; $arg = eval "qr/$re/$mode"; }; $arg; }; =head2 C<< $shell->link_text LINK >> Returns a meaningful text from a WWW::Mechanize::Link object. This is (in order of precedence) : $link->text $link->name $link->url =cut sub link_text { my ($self,$link) = @_; my $result; for (qw( text name url )) { $result = $link->$_ and last; }; $result; }; =head2 C<$shell-Ehistory> Returns the (relevant) shell history, that is, all commands that were not solely for the information of the user. The lines are returned as a list. print join "\n", $shell->history; =cut sub history { my ($self) = @_; map { $_->[0] } @{$self->{history}} }; =head2 C<$shell-Escript> Returns the shell history as a Perl program. The lines are returned as a list. The lines do not have a one-by-one correspondence to the lines in the history. print join "\n", $shell->script; =cut sub script { my ($self,$prefix) = @_; $prefix ||= ""; my @result = sprintf <<'HEADER', $^X; #!%s -w use strict; use WWW::Mechanize; use WWW::Mechanize::FormFiller; use URI::URL; my $agent = WWW::Mechanize->new( autocheck => 1 ); my $formfiller = WWW::Mechanize::FormFiller->new(); $agent->env_proxy(); HEADER push @result, map { my $x = $_->[1]; $x =~ s/^/$prefix/mg; $x } @{$self->{history}}; @result; }; =head2 C<$shell-Estatus> C is called for status updates. =cut sub status { my $self = shift; print join "", @_; }; =head2 C<$shell-Edisplay FILENAME LINES> C is called to output listings, currently from the C and C HTML::Form will not know about this and will not have provided a submit button for you (understandably). If you want to create such a submit button from within your automation script, use the following code : $agent->current_form->push_input( submit => { name => "submit", value =>"submit" } ); This also works for other dynamically generated input fields. To fake an input field from within a shell session, use the C command : eval $self->agent->current_form->push_input(submit=>{name=>"submit",value=>"submit"}); And yes, the generated script should do the Right Thing for this eval as well. =head1 LOCAL FILES If you want to use the shell on a local file without setting up a C server to serve the file, you can use the C URI scheme to load it into the "browser": get file:local.html forms =head1 PROXY SUPPORT Currently, the proxy support is realized via a call to the C method of the WWW::Mechanize object, which loads the proxies from the environment. There is no provision made to prevent using proxies (yet). The generated scripts also load their proxies from the environment. =head1 ONLINE HELP The online help feature is currently a bit broken in C, but a fix is in the works. Until then, you can reenable the dynamic online help by patching C : Remove the three lines my $smry = exists $o->{handlers}{$h}{smry} ? $o->summary($h) : "undocumented"; in C and replace them by my $smry = $o->summary($h); The shell works without this patch and the online help is still available through C =head1 BUGS Bug reports are very welcome - please use the RT interface at https://rt.cpan.org/NoAuth/Bugs.html?Dist=WWW-Mechanize-Shell or send a descriptive mail to bug-WWW-Mechanize-Shell@rt.cpan.org . Please try to include as much (relevant) information as possible - a test script that replicates the undesired behaviour is welcome every time! =over 4 =item * The two parameter version of the C command guesses the realm from the last received response. Currently a RE is used to extract the realm, but this fails with some servers resp. in some cases. Use the four parameter version of C, or if not possible, code the extraction in Perl, either in the final script or through C commands. =item * The shell currently detects when you want to follow a JavaScript link and tells you that this is not supported. It would be nicer if there was some callback mechanism to (automatically?) extract URLs from JavaScript-infected links. =back =head1 TODO =over 4 =item * Add XPath expressions (by moving C from HTML::Parser to XML::XMLlib or maybe easier, by tacking Class::XPath onto an HTML tree) =item * Add C as a command ? =item * Optionally silence the HTML::Parser / HTML::Forms warnings about invalid HTML. =back =head1 EXPORT The routine C is exported into the importing namespace. This is mainly for convenience so you can use the following commandline invocation of the shell like with CPAN : perl -MWWW::Mechanize::Shell -e"shell" =head1 COPYRIGHT AND LICENSE This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. Copyright (C) 2002,2003 Max Maischein =head1 AUTHOR Max Maischein, Ecorion@cpan.orgE Please contact me if you find bugs or otherwise improve the module. More tests are also very welcome ! =head1 SEE ALSO L,L =cut