package Net::Amazon::MechanicalTurk::Command::RetrieveResults; use strict; use warnings; use Carp; use Net::Amazon::MechanicalTurk::RowData; use Net::Amazon::MechanicalTurk::DelimitedWriter; our $VERSION = '1.00'; =head1 NAME Net::Amazon::MechanicalTurk::Command::RetrieveResults - Retrieves answer data. =head1 SYNOPSIS $mturk->retrieveResults( input => "loadhits-success.csv", output => "loadhits-results.csv", progress => \*STDOUT ); =head1 C retrieveResults Retrieves the available answers for each HITId in input and stores the answers in output. input - The input parameter is something which should produce rows of data (hashes) with at least one column named HITId. The case does not matter, however all rows should use the same column name. Row 1 should not have hitid, if row 2 has HITId. This parameter may be of the following types: - Net::Amazon::MechanicalTurk::RowData - An array of hashes. (This is internally converted into an object of type: Net::Amazon::MechanicalTurk::RowData::ArrayHashRowData) - A reference to a subroutine. When the retrieveResults method asks for row data, the subroutine will be called and passed a subroutine reference, which should be called for every row generated by the input. The generated row should be a hash reference. (This is internally converted into an object of type Net::Amazon::MechanicalTurk::RowData::SubroutineRowData) - The name of a file. The file should be either a CSV or tab delimited file. If the file name ends with '.csv', it will read as a CSV, otherwise it is assumed to be tab delimited. The first row in the file should contain the column names. Each subsequence row becomes a hash reference based on the column names. (This is internally converted into an object of type Net::Amazon::MechanicalTurk::RowData::DelimitedRowData) output - Used to write answer information back to some form of data storage. This parameter may be of the following types: - A filename. The answers for an assignment will be written to the file name as a delimited file. It will be a CSV file if the file name ends with .csv, otherwise it will be a tab delimited file. The file will have the following named columns: AssignmentId AssignmentStatus HITId WorkerId There will be unnamed columns after those. These columns will come in pairs. A pair will have a question id and then a textual form of the answer, for that question. - A subroutine. The subroutine will be called back with a hash containing the following values: - mturk - A handle to the mturk client. - HITId - The HITId for the assignment. - row - The row from the input. - assignment - An assignment object as returned by GetAssignmentsForHITAll. =cut sub retrieveResults { my $mturk = shift; my %params = @_; foreach my $required (qw{ input output }) { if (!exists $params{$required}) { Carp::croak("Missing required parameter $required."); } } my $progress = progressBlock($params{progress}); my $input = Net::Amazon::MechanicalTurk::RowData->toRowData($params{input}); my $output = outputBlock($params{output}); my $start = time(); if ($progress) { $progress->("--[Initializing] " . scalar localtime() . " ---"); $progress->(" URL: " . $mturk->serviceUrl); $progress->(" Input: $params{input}"); $progress->(" Output: $params{output}"); } my $hitIdKey; my $hits = 0; my $results = 0; my $rowno = 0; $progress->("--[Retrieving results] " . scalar localtime() . " ---") if ($progress); $input->each(sub { my ($_self, $row) = @_; # Need to find the column name of the HITId key # The loop is for case insensitive matching and ignoring white space. if (!defined $hitIdKey) { while (my ($key,$value) = each %$row) { if ($key =~ /^\s*HITId\s*$/i) { $hitIdKey = $key; last; } } if (!defined $hitIdKey) { Carp::croak("Couldn't find HITId column. Row has the following columns: " . join(", ", sort keys (%$row))); } } $rowno++; if (!exists $row->{$hitIdKey}) { Carp::croak("Couldn't find HITId column on row $rowno."); } my $hitId = $row->{$hitIdKey}; $hitId =~ s/^\s+//; $hitId =~ s/\s+$//; $progress->(" Getting assignments for HIT $hitId.") if $progress; my $assignments = $mturk->GetAssignmentsForHITAll( HITId => $hitId ); while (my $assignment = $assignments->next) { $output->( HITId => $hitId, mturk => $mturk, row => $row, assignment => $assignment ); $results++; } $hits++; }); if ($progress) { $progress->(" Examined $hits hits."); $progress->(" Downloaded $results answers."); $progress->("--[Done retrieving results] " . scalar localtime() . " ---"); $progress->(" Total load time: " . (time() - $start) . " seconds."); } } sub progressBlock { my ($progress) = @_; if (!defined($progress)) { return $progress; } elsif (UNIVERSAL::isa($progress, "CODE")) { return $progress; } elsif (UNIVERSAL::isa($progress, "GLOB")) { return sub { print $progress @_, "\n"; }; } else { Carp::croak("The progress parameters should be an IO handle or a subroutine."); } } sub outputBlock { my ($output) = @_; if (UNIVERSAL::isa($output, "CODE")) { return $output; } else { return createOutputBlock($output); } } sub createOutputBlock { my ($file) = @_; my $out; my $rowNumber = 0; if (UNIVERSAL::isa($file, "GLOB")) { $out = Net::Amazon::MechanicalTurk::DelimitedWriter->new( output => $file, fieldSeparator => "\t" ); } elsif (UNIVERSAL::isa($file, "Net::Amazon::MechanicalTurk::DelimitedWriter")) { $out = $file; } else { my $fs = ($file =~ /\.csv$/i) ? "," : "."; $out = Net::Amazon::MechanicalTurk::DelimitedWriter->new( lazy => 1, file => $file, fieldSeparator => $fs, autoflush => 1 ); } my $row = 0; return sub { return writeOutput(out => $out, rowNumber => ++$row, @_); }; } sub writeOutput { my %params = @_; my $out = $params{out}; my $rowNumber = $params{rowNumber}; my $mturk = $params{mturk}; my $assignment = $params{assignment}; if ($rowNumber == 1) { $out->write(qw{ AssignmentId AssignmentStatus HITId WorkerId Answers }); } my $answers = $mturk->parseAssignmentAnswer($assignment); my @row = ( $assignment->{AssignmentId}[0], $assignment->{AssignmentStatus}[0], $assignment->{HITId}[0], $assignment->{WorkerId}[0] ); $answers->eachAnswerValue(sub { my ($questionId, $answerText) = @_; push(@row, $questionId, $answerText); }); $out->write(@row); } return 1;