# $Id: Poster.pm,v 1.5 2007/08/02 09:06:25 grant Exp $ package WWW::Sitebase::Poster; use warnings; use strict; use WWW::Sitebase -Base; use IO::Prompt; use Carp; use File::Spec::Functions; use List::Compare; =head1 NAME WWW::Sitebase::Poster - Base class for web site posting routines =head1 VERSION Version 0.4 =cut our $VERSION = '0.4'; =head1 SYNOPSIS package MyPostingModule; use WWW::Sitebase::Poster -Base; # Define your options sub default_options { my $options = super; $options->{cache_file} = { default => 'mypostingmodule' }; # (VERY IMPORTANT) $options->{my_option} = 0; # 0 = not required. 1 means required. $options->{my_option} = { default => 'mydefault' }; # Sets a default for your option. # Some common example options, say for posting messages or comments: $options->{subject} = 1; # Require subject $options->{message} = 1; # Require a message return $options; } # Add accessors if you like (usually a good idea) # (Poster.pm already gives you the cache_file accessor). field 'my_option'; field 'subject'; field 'message'; # Define your send_post method (see examples below) sub send_post { my ( $friend_id ) = @_; $result = $self->browser->do_something( $friend_id, $other_value ); # ... Do anything else you need ... return $result; # $result must be P, R, F, or undef. (Pass, Retry, Fail, or stop) } ---------------- Then you or others can write a script that uses your module. #!/usr/bin/perl -w use MyPostingModule; use WWW::Myspace; my @friend_list = &fancy_friend_gathering_routine; my $poster = new MyPostingModule( browser => new WWW::Myspace, # Note, this'll prompt for username/password friend_ids => \@friend_list, subject => 'hi there!', message => 'I'm writing you a message!', noisy => 1, interactive => 1, ); $poster->post; This is a base class for modules that need to post things and remember to whom they've posted. If you're writing a new module that needs to send something and remember stuff about it, you'll want to look at this module. It gives you all sorts of neat tools, like write_log and read_log to remember what you did, and it automatically parses all your arguments right in the new method, and can even read them from a config file in CFG or YAML format. All the "new" method stuff it just inherits from WWW::Sitebase, so look there for more info. The cache_file is where write_log and read_log write and read their data. You MUST set the cache_file default to something specific to your module. This will be used by the cache_file method to return (and create if needed) the default cache file for your module. Make sure it's unique to "Poster" modules. (Hint: name it after your module). Your default filename will be placed in the value returned by $self->cache_dir (.www-poster by default), so don't specify a path. If you're writing a WWW::Myspace module, you should override cache_dir. See "cache_dir" below. This module itself is a subclass of WWW::Sitebase, so it inherits "new", default_options, and a few other methods from there. Be sure to read up on WWW::Sitebase if you're not familiar with it, as your class will magically inherit those methods too. If you're writing a script that uses a subclass of this module, you can read up on the methods it provides below. =cut =head1 OPTIONS The following options can be passed to the new method, or set using accessor methods (see below). Note that if you're writing a script using a subclass of this module, more options may be available to the specific subclass you're using. Options with sample values: friend_ids => [ 12345, 123456 ], # Arrayref of friendIDs. cache_file => '/path/to/file', max_count => 50, # Maximum number of successful posts before stopping html => 1, # 1=display in HTML, 0=plain text. delay_time => 86400, # Number of seconds to sleep on COUNTER/CAPTCHA interactive => 1, # Can we ask questions? Turns on noisy also. noisy => 1, # Display detailed output (1) or be quiet (0)? browser => $myspace, # A valid, logged-in site browsing object (i.e. WWW::Myspace, # or a subclass of WWW::Sitebase::Navigator). =cut =head2 default_options Override this method to allow additional options to be passed to "new". You should also provide accessor methods for them. These are parsed by Params::Validate. In breif, setting an option to "0" means it's optional, "1" means it's required. See Params::Validate for more info. It looks like this: sub default_options { $self->{default_options} = { friend_ids => 0, cache_file => 0, html => 0, browser => 0, exclude_my_friends => { default => 0 }, interactive => { default => 1 }, noisy => { default => 1 }, max_count => { default => 0 }, }; return $self->{default_options}; } # So to add a "questions" option that's mandatory: sub default_options { super; $self->{default_options}->{questions}=1; return $self->{default_options}; } =cut sub default_options { $self->{default_options} = { friend_ids => 0, cache_file => 0, html => 0, browser => 0, exclude_my_friends => { default => 0 }, interactive => { default => 1 }, noisy => { default => 1 }, max_count => { default => 0 }, }; return $self->{default_options}; } =head2 friend_ids Retreives/sets the list of friendIDs for whom we're going to post things. $message->friend_ids( 12345, 12347, 123456 ); # Set the list of friends @friend_ids = $message->friend_ids; # Retreive the list of friends You can set the friend_ids to a list of friends, an arrayref to a list of friends, or to an object whose "get_friends" method will return the list of friends. When called without arguments, returns a list of friends (even if you set it with an arrayref). =cut sub friend_ids { if ( @_ ) { if ( ref $_[0] ) { $self->{friend_ids} = $_[0]; } else { $self->{friend_ids} = \@_; } } else { # If $self->{friend_ids} is set, it's either an array ref # to a list of friends, or an object that we need to call # "get_friends" on, which will return a list of friends. if ( defined ( $self->{friend_ids} ) ) { if ( ref $self->{friend_ids} eq "ARRAY" ) { return @{ $self->{friend_ids} }; } else { return $self->{friend_ids}->get_friends; } } else { return (); } } } =head2 cache_dir cache_dir sets or returns the directory in which we should store cache data. Defaults to $ENV{'HOME'}/.www-poster. If you're subclassing this module to write a module that will use WWW::Myspace, you should override this method with something like: sub cache_dir { $self->browser->cache_dir( @_ ) } This will put your module's cache data neatly into the same place as the other WWW::Myspace modules' data. =cut # Get and scrub the path to their home directory. our $HOME_DIR= ""; if ( defined $ENV{'HOME'} ) { $HOME_DIR = "$ENV{'HOME'}"; if ( $HOME_DIR =~ /^([\-A-Za-z0-9_ \/\.@\+\\:]*)$/ ) { $HOME_DIR = $1; } else { croak "Invalid characters in $ENV{HOME}."; } } field cache_dir => catfile( "$HOME_DIR", '.www-poster' ); =head2 cache_file Sets or returns the cache filename. This defaults to $self->default_options->{cache_file}->{default} in cache_dir. If you try to call cache_file without a value and you haven't set default_options properly, it'll get really pissed off and throw nasty error messages all over your screen. For convenience this method returns the value in all cases, so you can do this: $cache_file = $commented->cache_file( "filename" ); =cut sub cache_file { if ( @_ ) { $self->{cache_file} = shift; } elsif (! defined $self->{cache_file} ) { # Make the cache directory if it doesn't exist $self->make_cache_dir; $self->{cache_file} = $self->default_options->{cache_file}->{default}; } return $self->{cache_file}; } =head2 cache_path Returns the full path to the cache_file. =cut sub cache_path { # Make the cache directory if it doesn't exist. $self->make_cache_dir; return catfile( $self->cache_dir, $self->cache_file ); } =head2 html( [1] [0] ) Sets to display HTML-friendly output (only really useful with "noisy" turned on also). Call html(1) to display HTML tags (currently just "BR" tags). Call html(0) to display plain text. Text output (html = 0) is enabled by default. Example $comment->html( 1 ); =cut field html => 0; =head2 browser Sets/retreives the site navigation object with which we're logged in. You'll probably just pass that info to the new method, but the accessor is here if you want to use it. Hint: To make your module more site-specific, add a convenience method: sub myspace { $self->browser( @_ ) } or sub bebo { $self->browser( @_ ) } =cut field 'browser'; =head2 exclude_my_friends Sets/retrieves the value of the "exclude_my_friends" flag. If set to a true value, the "post" method will exclude the logged-in user's friends from the list of friendIDs set in the "friend_ids" method. This works by calling the "get_friends" method of the browser object. If the object stored in "browser" doesn't have a "get_friends" method, the "post" routine will die. Note that getting friends can take some time, so it's best to have your friend list properly filtered instead of using this option. But, it's here if you need it. =cut field 'exclude_my_friends'; =head2 interactive If set to 1, allows methods to ask questions by displaying a prompt and reading STDIN. Setting to 0 makes the script run non-interactively. Setting to 1 automatically sets "noisy" to 1 also. =cut sub interactive { if ( @_ ) { ( $self->{interactive} ) = @_; if ( $self->{interactive} ) { $self->noisy(1) } } return $self->{interactive}; } =head2 noisy( [1] [0] ) If set to 1, the module should output status reports for each post. This, of course, will vary by module, and you'll probably want to document any module-specific output in your module. If "noisy" is off (0), run silently, unless there is an error, until you have to stop. Then you may print a report or status. noisy is off (0) by default. =cut field noisy => 0; =head2 max_count Sets or returns the number of posts we should attempt before stopping. Default: 0 (don't stop). This is handy if you want to stop before a CAPTCHA response, or if you want to limit your daily posts. Override this to set a default that's appropriate for your module (i.e. 50 for a Myspace commenting module) =cut field max_count => 0; =head1 POSTING =head2 send_post You must override this method with your posting method. It will be called by the "post" method and passed an ID from the list of friend_ids (set using the option to the "new" method or using the "friend_ids" accessor method). It must return two values: a result code (P, R, F, or undef) and a human-readable reason string. The result codes mean "Pass", "Retry", "Fail", and "stop!" respectively, and the human-readable reason will be used in the report output when the "post" method stops. Example: # Send Myspace group invitations. The send_group_invitation method returns two # array references, one of passed IDs and one of failed. We want to retry any # failures. sub send_post { my ( $id ) = @_; my ( $passed, $failed ) = $self->browser->send_group_invitation( $id ); # We only passed 1 ID, so if "passed" has anything in it, our ID passed. if ( @{ $passed } ) { return 'P', 'Invitation Sent'; } else { return 'R', 'Invitation send failed'; } } # Post a comment on Myspace. There are several possible codes post_comment could # return, so we want to decide for each whether to retry or not. Also, if we reach a # CAPTCHA response, we want to stop. Note that this example assumes your # subclass module defined "subject" and "message" accessors. sub send_post { my ( $id ) = @_; my $result = $self->browser->post_comment( $id, $self->subject, $self->message ); if ( $result eq 'P' ) { return 'P', 'Passed'; } elsif ( $result eq 'FC' ) { return undef; } elsif ( $result eq 'FN' ) { return 'R', "Network error"; } elsif ( $result eq 'FF' ) { return 'F', 'Person is not your friend'; } else { return 'R', 'Failed - reason unknown'; } } =cut stub 'send_post'; =head2 post This is the main method of the module. It is called to do the actual posting. It gathers the friendIDs and loops through them, calling the "send_post" method to send each post. It handles logging each post, and excluding previously-posted friends. =cut sub post { no strict 'refs'; # Check for browser object croak "Must set a valid browser object before calling post method" unless ( $self->browser ); $self->{post_count} = 0; my ( $result, $reason ); my ( @friend_list ) = $self->friend_ids; ( @friend_list ) = $self->_exclude_friends( @friend_list ); unless ( @friend_list ) { $self->_report( "Nothing to process\n" ); return; } foreach my $id ( @friend_list ) { ( $result, $reason ) = $self->send_post( $id ); last unless ( $result ); $self->_record_result( $id, $result, $reason ); $self->{post_count}++ unless ( $result eq 'R' ); last if ( $self->max_count && ( $self->{post_count} > $self->max_count ) ); } $self->_final_report; } =head2 post_count Returns the current number of successful posts (from the internal counter used by the "post" method. # Pause after every 25th post sleep 30 if ( ( $self->post_count % 25 ) == 0 ); =cut sub post_count { $self->{post_count} } sub _record_result { my ( $friend_id, $result, $reason ) = @_; unless ( $result =~ /^[PFR]$/o ) { croak "Invalid result code: \"$result\".\n". "Valid codes are P, R, or F (Pass, Retry, or Fail)."; } $self->write_log( { friend_id => $friend_id, status => $result } ); $self->{reasons}->{$reason}++; } sub _final_report { no strict 'refs'; print "\n\nFinal status report...\n\n######################\n"; foreach my $reason ( keys( %{ $self->{reasons} } ) ) { print $self->{reasons}->{$reason} . " " . $reason; } print "\n"; } sub _exclude_friends { my ( @friend_list ) = @_; my @exclude_list = (); # Exclude our friends if they asked. if ( $self->{'exclude_my_friends'} ) { $self->_report("Getting friend IDs to exclude. This could take a while.\n"); push @exclude_list, $self->browser->get_friends; } # Exclude previous posts $self->_report( "Retreiving list of previous posts\n" ); push @exclude_list, $self->read_posted('all'); # Process the exclusions $self->_report( "Processing exclusions...\n" ); my $lc = List::Compare->new( { lists => [ \@exclude_list, \@friend_list], accelerated => 1, # Only one comparison unsorted => 1, # Unsorted } ); return ( $lc->get_complement ); } =head1 LOGGING METHODS =head2 reset_log( [ $filter ] ) Resets the log file. If passed a subroutine reference in $filter, items matching filter will be left in the log - everything else will be erased. Say for example you wanted to retry all "Failed" items: $filter = sub { ( $_->{'status'} eq "P" ) }; $self->reset_log( $filter ); To delete the log file completely, just do: $self->reset_log; =cut sub reset_log { my ( $filter ) = @_; unless ( defined $filter ) { unlink $self->cache_path or croak @!; $self->{log} = undef; } else { # Read in the items to save $self->read_log( $filter ); # Write that to the exclusions file. $self->write_log('all'); } } #--------------------------------------------------------------------- =head2 write_log( 'all' | $data ) If called with "all", write $self->{log} to the log file. If called with a hash of data, append a line to the log file. $self->write_log( 'all' ); $self->write_log( { friend_id => $friend_id, status => $status } ); If there is a "time" field in the list of log_fields (there is by default), write_log will automatically write the current time (the value returned by the "time" function) to the file. =cut sub write_log { no strict 'refs'; my ( $data ) = @_; my ( $fh, $key_field, $key_value ); # We track who we've posted to in a file. We need to # open and close it each time to make sure everyone # gets stored. if ( $data eq 'all' ) { # Re-write the file (called by reset_exclusions). # ($fh closes when it goes out of scope) open( $fh, ">", $self->cache_path ) or croak @!; foreach $key_value ( sort( keys( %{ $self->{log} } ) ) ) { $self->$print_row( $key_value, $fh ); } } else { # Just append the current data. # ($fh closes when it goes out of scope) open( $fh, ">>", $self->cache_path ) or croak @!; # Write the data into the log hash $key_field = $self->log_fields->[0]; # i.e. "friend_id" $key_value = $data->{"$key_field"}; # i.e. "12345" # Add the time if it's not there unless ( exists $data->{'time'} ) { $data->{'time'} = time; } # Store the rest of the passed data into the log hash. $self->{'log'}->{$key_value} = $data; # Write that row to the log file. $self->$print_row( $key_value, $fh ); } } # print_row( $row_key, $fh ); # Print the row of data from the log hash specified by $row_key to the # file identified by the filehandle reference $fh. my sub print_row { no strict 'refs'; my ( $row_key, $fh ) = @_; # Assemble the row my $row = ""; foreach my $fieldname ( @{ $self->log_fields } ) { ( $row ) && ( $row .= ":" ); $row .= $self->{log}->{$row_key}->{"$fieldname"}; } # Print to the file print $fh "$row\n"; } =head2 log_fields Returns a reference to an array of the columnn names you use in your log file. Defaults to friend_id, status, and time. The first field will be used as your unique key field. Override this method if you want to use different columns in your log file. =cut const 'log_fields' => [ 'friend_id', 'status', 'time' ]; #---------------------------------------------------------------------- =head2 read_log Read items from the log file. The first time it's invoked, it reads the log file contents into $self->{log}, which is also neatly maintained by write_log. This lets you call read_log without worrying about huge performance losses, and also makes it extendable to use SQL in the future. For future compatibility, you should access the log only through read_log (i.e. don't access $self->{log} directly). # Post something unless we've successfully posted before unless ( $self->read_log("$friend_id")->{'status'} =~ /^P/ ) { $myspace->post_something( $friend_id ) } # When did we last post to $friend_id? $last_time = $self->read_log("$friend_id")->{'time'}; if ( $last_time ) { my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($last_time); print "Successfully posted to $friend_id on: " . "$mon/$day/$year at $hour:$min:sec\n";; } else { print "I don't remember posting to $friend_id before\n"; } read_log can be called with an optional filter argument, which can be the string "all", or a reference to a subroutine that will be used to filter the returned values. The subroutine will be passed a hashref of fieldnames and values, by default: { friend_id => 12345, status => P, time => time in 'time' format } This lets you do things like this: # Reload the cache in memory ($self->{log}) $self->read_log( 'all' ) # Return a list of friends that we've already posted # ("the 'o' flag means to optimize the RE because the RE is a constant). my $filter = sub { if ( $_->{'status'} =~ /^[PF]$/o ) { 1 } else { 0 } } @posted_friends = $self->read_log( $filter ); # Of course, that's just for example - you'd really do this: @posted_friends = $self->read_log( sub { ( $_[0]->{'status'} =~ /^[PF]$/o ) } ); # or this, which means "return anything that doesn't need to be retried" # (this is, in fact, what "read_posted" (see below) does). @posted_friends = $self->read_log( sub { ( $_[0]->{'status'} ne 'R' ) } ); Only the last post attempt for each key (friend_id by default) is stored in $self->{log}. It is possible for the cache file to have more than one in some circumstances, but only the last will be used, and if the file is re-written, previous entries will be erased. =cut sub read_log { no strict 'refs'; my $filter = ""; ( $filter ) = @_ if ( @_ ); my $status = ""; my $id; my @values; # If we haven't read the log file yet, do it. unless ( ( defined $self->{log} ) && ( $filter ne 'all' ) ) { if ( -f $self->cache_path ) { open( LOGGED, "<", $self->cache_path ) or croak "Can't read cache file: " . $self->cache_path . "\n"; } else { $self->{log} = {}; return $self->{log}; } # There's a cache file, so read it while ( $id = ) { chomp $id; ( @values ) = split( ":", $id ); # Match the values to the appropriate fieldnames my $i = 0; my %data = (); foreach my $value ( @values ) { my $fieldname = $self->log_fields->["$i"]; $data{"$fieldname"}=$value; $i++; } $self->{'log'}->{"$values[0]"} = { %data }; } close LOGGED; } # If we reloaded, we're done. return $self->{log} if ( $filter eq 'all' ); # If they passed a specific key value instead of a filter subroutine, # return the appropriate record if it exists. if ( ( $filter ) && ( ! ref $filter ) ) { if ( exists $self->{log}->{"$filter"} ) { return $self->{log}->{$filter} } else { return ""; } } # Unless we've got a real filter, return. unless ( ref $filter ) { return $self->{log}; } # Return a list of keys that matches their filter my @keys = (); foreach my $key_value ( sort( keys( %{ $self->{log} } ) ) ) { if ( &$filter( $self->{log}->{"$key_value"} ) ) { push( @keys, $key_value ); } } return ( @keys ); } =head2 read_posted Returns the keys of all posted rows (status isn't "R"). my @posted_friends = $self->read_posted; =cut sub read_posted { return ( $self->read_log( sub { ( $_[0]->{'status'} ne 'R' ) } ) ); } =head2 previously_posted( $friend_id ) This convenience method returns true if there's a log entry for a previous successful posting. A posting is considered successful if the status code is "P" or "F". unless ( $self->previously_posted( $friend_id ) ) { $self->post( $friend_id ); } =cut sub previously_posted { return ( $self->read_log( $_[0] )->{'status'} ne 'R' ); } sub _report { print @_ if $self->{'interactive'}; } =head2 make_cache_dir Creates the cache directory in cache_dir. Only creates the top-level directory, croaks if it can't create it. $myspace->cache_dir("/path/to/dir"); $myspace->make_cache_dir; This function mainly exists for the internal login method to use, and for related sub-modules that store their cache files by default in WWW:Myspace's cache directory. =cut sub make_cache_dir { # Make the cache directory if it doesn't exist. unless ( -d $self->cache_dir ) { mkdir $self->cache_dir or croak "Can't create cache directory ". $self->cache_dir; } } # This tells Sitebase we don't want to save the browser field. sub _nosave { my ( $key ) = shift; if ( $key && ( $key eq 'browser' ) ) { return 0 } return 1; } =pod =head1 AUTHOR Grant Grueninger, C<< >> =head1 BUGS Please report any bugs or feature requests to C, or through the web interface at L. I will be notified, and then you'll automatically be notified of progress on your bug as I make changes. =head1 SUPPORT You can find documentation for this module with the perldoc command. perldoc WWW::Sitebase::Poster You can also look for information at: =over 4 =item * AnnoCPAN: Annotated CPAN documentation L =item * CPAN Ratings L =item * RT: CPAN's request tracker L =item * Search CPAN L =back =head1 ACKNOWLEDGEMENTS =head1 COPYRIGHT & LICENSE Copyright 2006 Grant Grueninger, all rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut 1; # End of WWW::Sitebase::Poster