########################################################## ## This script is part of the Devel::NYTProf distribution ## ## Copyright, contact and other information can be found ## at the bottom of this file, or by going to: ## http://search.cpan.org/~akaplan/Devel-NYTProf ## ########################################################### package Devel::NYTProf::Reader; use warnings; use strict; use Carp; use vars qw/$VERSION/; BEGIN { our $VERSION = '1.12'; # must match NYTProf.pm b/c dumb MakeMaker require XSLoader; XSLoader::load('Devel::NYTProf', $Devel::NYTProf::Reader::VERSION); # Provides Devel::NYTProf::Reader::process(); } # These control the limits for what the script will consider ok to severe times # specified in standard deviations from the mean time use constant SEVERITY_SEVERE => 2.0; # above this deviation, a bottleneck use constant SEVERITY_BAD => 1.0; use constant SEVERITY_GOOD => 0.5; # within this deviation, okay sub new { my $class = shift; my $self = { file => 'nytprof.out', output_dir => '.', suffix => '.csv', header => "# Profile data generated by Devel::NYTProf::Reader\n" ."# Version: v$VERSION\n" ."# Author: Adam Kaplan. More information at " ."http://search.cpan.org/~akaplan\n" ."# Format: time,calls,time/call,code\n", datastart => '', line => [ {}, { value => 'time', end => ',', default => '0'}, { value => 'calls', end => ',', default => '0'}, { value => 'time/call', end => ',', default => '0'}, { value => 'source', end => '', default => '' }, { end => "\n" } ], dataend => '', footer => '', taintmsg => "# WARNING!\n" ."# The source file used in generating this report has been modified\n" ."# since generating the profiler database. It might be out of sync\n", # -- OTHER STUFF -- replacements => [ {pattern => '!~FILENAME~!', replace => "\$FILE"}, {pattern => '!~DEV_CALLS~!', replace => "\$statistics{calls}->[0]"}, {pattern => '!~DEV_TIME~!', replace => "\$statistics{time}->[0]"}, {pattern => '!~DEV_TIME/CALL~!', replace => "\$statistics{'time/calls'}"}, {pattern => '!~MEAN_CALLS~!', replace => "\$statistics{calls}->[1]"}, {pattern => '!~MEAN_TIME~!', replace => "\$statistics{time}->[1]"}, {pattern => '!~MEAN_TIME/CALLS~!', replace => "\$statistics{'time/calls'}->[1]"}, {pattern => '!~TOTAL_CALLS~!', replace => "\$self->{filestats}->{\$filestr}->{'calls'}"}, {pattern => '!~TOTAL_TIME~!', replace => "\$self->{filestats}->{\$filestr}->{'time'}"}, ], callsfunc => undef, timefunc => undef, 'time/callsfunc' => undef, numeric_precision => { time => 7, calls => 0, 'time/call' => 7 }, }; if (defined $_[0]) { $self->{file} = $_[0]; } bless($self, $class); $self->{data} = process($self->{file}); $self->{profile_db_time} = getDatabaseTime(); return $self; } ## sub setParam { my ($self, $param, $value) = @_; if ($param eq 'linestart') { $self->{line}->[0] = $value; } elsif ($param eq 'column1') { $self->{line}->[1] = $value; } elsif ($param eq 'column2') { $self->{line}->[2] = $value; } elsif ($param eq 'column3') { $self->{line}->[3] = $value; } elsif ($param eq 'column4') { $self->{line}->[4] = $value; } elsif ($param eq 'lineend') { $self->{line}->[5] = $value; } elsif (!exists $self->{$param}) { confess "Attempt to set $param to $value failed: $param is not a valid " ."parameter\n"; } else { return $self->{$param} unless defined ($value); $self->{$param} = $value; } undef; } ## sub addRegexp { my ($self, $pattern, $replace) = @_; push (@{$self->{user_regexp}}, {pattern => $pattern, replace => $replace}); } # calculate the standard deviation for this data set # NOT USED. This will work very well in code that is normalized (i.e. the same # time is spent in every file) but is fairly useless in practice... # use it if you want. sub calculate_sd { my $stats = shift; $stats = [sort {$a <=> $b} @$stats]; my $sum; map ($sum += $_, @$stats); my $mean = $sum / scalar @$stats; $sum = 0; map ($sum += ($_ - $mean) ** 2, @$stats); return [sqrt (abs ($sum / scalar @$stats)), $mean]; } ## Use average distance from the median value. Highly resistant to extremes sub calculate_median_absolute_deviation { my $stats = shift; $stats = [sort {$a <=> $b} @$stats]; my $median = $stats->[int (scalar(@$stats) / 2)]; my $sum; map ($sum += abs($_ - $median), @$stats); return [$sum / scalar @$stats, $median]; } ## sub _test_file { my $self = shift; my $file = shift; unless (-f $file) { carp "Unable to locate source file: $file\n"; return 0; } return 1 if (stat $file)[9] > $self->{profile_db_time}; 0; } ## sub _output_additional { my ($self, $fname, $content) = @_; open (OUT, '>', "$self->{output_dir}/$fname") or confess "Unable to open $self->{output_dir}/$fname for writing; $!\n"; print OUT @$content; close OUT; } ## sub getFileStats { my $self = shift; return $self->{filestats}; } ## sub outputDir { my ($self, $dir) = @_; return $self->{output_dir} unless defined($dir); if (!mkdir $dir) { confess "Unable to create directory $dir: $!\n" if !$! =~ /exists/; } $self->{output_dir} = $dir; } ## sub report { my $self = shift; foreach my $filestr (keys %{$self->{data}}) { # test file modification date. Files that have been touched after the # profiling was done may very well produce useless output since the source # file might differ from what it looked like before. my $tainted = $self->_test_file($filestr); my %totalsAccum; # holds all line times. used to find median my %totalsByLine; # holds individual line stats my $runningTotalTime; # holds the running total # (should equal sum of $totalsAccum) my $runningTotalCalls; # holds the running total number of calls. foreach my $key (keys %{$self->{data}->{$filestr}}) { my $a = $self->{data}->{$filestr}->{$key}; if (0 == $a->[1]) { # The debugger cannot stop on BEGIN{...} lines. A line in a begin # may set a scalar reference to something that needs to be eval'd later. # as a result, if the variable is expanded outside of the BEGIN, we'll # see the original BEGIN line, but it won't have any calls or times # associated. This will cause a divide by zero error. $a->[1] = 1; } push(@{$totalsAccum{'time'}}, $a->[0]); push(@{$totalsAccum{'calls'}}, $a->[1]); push(@{$totalsAccum{'time/call'}}, $a->[0] / $a->[1]); $totalsByLine{$key}->{'time'} += $a->[0]; $totalsByLine{$key}->{'calls'} += $a->[1]; $totalsByLine{$key}->{'time/call'} = $totalsByLine{$key}->{'time'} / $totalsByLine{$key}->{'calls'}; $runningTotalTime += $a->[0]; $runningTotalCalls += $a->[1]; } $self->{filestats}->{$filestr}->{'time'} = $runningTotalTime; $self->{filestats}->{$filestr}->{'calls'} = $runningTotalCalls; $self->{filestats}->{$filestr}->{'time/call'} = $runningTotalTime / $runningTotalCalls; # Use Median Absolute Deviation Formula to get file deviations for each of # calls, time and time/call values my %statistics = ( 'calls' => calculate_median_absolute_deviation($totalsAccum{'calls'}), 'time' => calculate_median_absolute_deviation($totalsAccum{'time'}), 'time/call' => calculate_median_absolute_deviation($totalsAccum{'time/call'}), ); # discover file path my $fname = $filestr; foreach (@INC) { $_ = '\.' if ($_ eq '.'); $fname =~ s/^$_//; } $fname =~ s#^[/\\]##o; # nuke leading / or \ $fname =~ s#[/\\]#-#go; # replace / and \ with html safe - $self->{filestats}->{$filestr}->{html_safe} = $fname; # localize header and footer for variable replacement my $header = $self->{header}; my $footer = $self->{footer}; my $taintmsg = $self->{taintmsg}; my $datastart = $self->{datastart}; my $dataend = $self->{dataend}; my $FILE = $filestr; foreach my $transform (@{$self->{replacements}}) { my $pattern = $transform->{pattern}; my $replace = $transform->{replace}; if ($pattern =~ m/^!~\w+~!$/) { # replace variable content $replace = eval $replace; $header =~ s/$pattern/$replace/g; $footer =~ s/$pattern/$replace/g; $taintmsg =~ s/$pattern/$replace/g; $datastart =~ s/$pattern/$replace/g; $dataend =~ s/$pattern/$replace/g; } } # open output file open (OUT, "> $self->{output_dir}/$fname$self->{suffix}") or confess "Unable to open $self->{output_dir}/$fname$self->{suffix} " ."for writing: $!\n"; # begin output print OUT $header; print OUT $taintmsg if $tainted; print OUT $datastart; if (! open (IN, $filestr)) { confess "Unable to open $filestr for reading: $!\n" .'Try running again in the same directory as you ran Devel::NYTProf,' ."or ensure \@INC is correct.\n"; } my $LINE = 1; # actual line number. PATTERN variable, DO NOT CHANGE foreach my $line () { chomp $line; foreach my $regexp (@{$self->{user_regexp}}) { $line =~ s/$regexp->{pattern}/$regexp->{replace}/g; } # can we get the main package for this file from this line? { local $1; $line =~ m/^\s*package\s+([\S]*)\s*;/; if (defined $1) { my $p = $1; $p =~ s/\:\:/-/g; my $t = substr($fname, -1 * (length $p) - 3, length $p); if($p eq $t) { $self->{data}->{$filestr}->{package} = $1; } } } # begin output foreach my $hash (@{$self->{line}}) { # If a function reference is provided, it will control ALL output. if (defined $hash->{func}) { if ($hash->{value}) { print OUT $hash->{func}($hash->{value}, $totalsByLine{$LINE}->{$hash->{value}}, $statistics{$hash->{value}}); } else { print OUT $hash->{func}($hash->{value}); } next; } print OUT $hash->{start} if defined $hash->{start}; if (defined $hash->{value}) { if ($hash->{value} eq 'source') { print OUT $line; # from source rather than profile db } elsif ($hash->{value} eq 'line') { print OUT $LINE; } elsif (exists $self->{data}->{$filestr}->{$LINE}) { print OUT sprintf("%0.".$self->{numeric_precision}->{$hash->{value}}."lf", $totalsByLine{$LINE}->{$hash->{value}}); } else { print OUT $hash->{default}; } } print OUT $hash->{end} if defined $hash->{end}; } # Increment line number counters $LINE++; } print OUT $dataend; print OUT $footer; close OUT; } } 1; __END__ =head1 NAME Devel::NYTProf::Reader - Tranforms L output into comprehensive, easy to read reports in (nearly) arbitrary format. =head1 SYNOPSIS # This module comes with two scripts that implement it: # # nytprofhtml - create an html report with statistics highlighting # nytprofcsv - create a basic comma delimited report # # They are in the bin directory of your perl path, so add that to your PATH. # # The csv script is simple, and really only provided as a starting point # for creating other custom reports. You should refer to the html script # for advanced usage and statistics. # First run some code through the profiler to generate the nytprof database. perl -d:NYTProf some_perl.pl # To create an HTML report in ./profiler nytprofhtml # To create a csv report in ./profiler nytprofcsv # Or to generate a simple comma delimited report manually use Devel::NYTProf::Reader; my $reporter = new Devel::NYTProf::Reader('nytprof.out'); # place to store the output $reporter->outputDir($file); # set other options and parameters $reporter->addRegexp('^\s*', ''); # trim leading spaces # generate the report $reporter->report(); # many configuration options exist. See nytprofhtml, advanced example. =head1 HISTORY A bit of history and a shameless plug... NYTProf stands for 'New York Times Profiler'. Indeed, this module was developed by The New York Times Co. to help our developers quickly identify bottlenecks in large Perl applications. The NY Times loves Perl and we hope the community will benefit from our work as much as we have from theirs. Please visit L, our open source blog to see what we are up to, L to see some of our open projects and then check out L for the latest news! =head1 DESCRIPTION L is a speedy line-by-line code profiler for Perl, written in C. This module is a complex framework that processes the output file generated by L It is capable of producing reports of arbitrary format and varying complexity. Basically, for each line of code that was executed and reported, this module will provide the following statistics: =over =item * Total calls =item * Total time =item * Average time per call =item * Deviation of all of the above =item * Line number =item * Source code =back C will process each source file that it can find in your C<@INC> one-by-one. For each line it processes, it will preform transformation and output based instructions that you can optionally provide. The configuration is very robust, supporting variations in field ordering, pattern substitutions (like converting ascii spaces to html spaces), and user callback functions to give you total control. =head1 CONSTRUCTOR =over 4 =item $reporter = Devel::NYTProf::Reader->new( ); =item $reporter = Devel::NYTProf::Reader->new( $FILE ); This method constructs a new C object, parses $FILE and return the new object. By default $FILE will evaluate to './nytprof.out'. See: L for how the profiler works. =back =head1 PARAMETERS Numerous parameters can be set to modify the behavior of C. The following methods are provided: =over 4 =item $reporter->outputDir( $output_directory ); Set the directory that generated files should be placed in. [Default: .] =item $reporter->addRegexp( $pattern, $replace ); Add a regular expression to the top of the pattern stack. Ever line of output will be run through each entry in the pattern stack. For example, to replace spaces, < and > with html entities, you might do: $reporter->addRegexp(' ', ' '); $reporter->addRegexp('<', '<'); $reporter->addRegexp('>', '>'); =item $reporter->setParam( $parameter, $value ); Changes the internal value of $parameter to $value. If $value is omitted, returns the current value of parameter. Basic Parameters: Paramter Description ------------ -------------- suffix The file suffix for the output file header Text printed at the start of the output file taintmsg Text printed ONLY IF source file modification date is later than the profile database modification date. Printed just after header datastart Text printed just before report output and after taintmsg dataend Text printed just after report output footer Text printed at the very end of report output callsfunc Reference to a function which must accept a scalar representing the total calls for a line and returns the output string for that field timesfunc Reference to a function which must accept a scalar representing the total time for a line and returns the output string for that field time/callsfunc Reference to a function which must accept a scalar representing the average time per call for a line and returns the output string for that field Advanced Parameters: Paramter Description -------------- -------------- linestart Printed at the start of each report line lineend Printed at the end of each report line column1 | column2 | The four parameters define what to print in each of column3 | the four output fields. See below column4 | Each of these parameters must be set to a hash reference with any of the following key/value pairs: Key Value ------------- ------------- start string printed at the start of the field end string printed at the end of the field value identifier for the value that this field will hold (can be: time, calls, time/calls, source) default string to be used when there is no value for the field specified in the 'value' key Basic Parameters Defaults: Parameter Default -------------- -------------- suffix '.csv' header "# Profile data generated by Devel::NYTProf::Reader v.$VERSION\n # Author: Adam Kaplan. More information at http://search.cpan.org/~akaplan\n# Format: time,calls, code\n" taintmsg "# WARNING!\n# The source file used in generating this report has been modified\n# since generating the profiler database. It might be out of sync\n" datastart '' dataend '' footer '' callsfunc undef timefunc undef time/callsfunc undef Advanced Parameters Defaults: Parameter Default -------------- -------------- linestart {} lineend { end => "\n" } column1 { value => 'time', end => ',', default => '0'} column2 { value => 'calls', end => ',', default => '0'} column3 { value => 'time/call', end => ',', default => '0'} column4 { value => 'source', end => '', default => '' } =back =head1 SUBROUTINES =over =item $reporter->report( ); Trigger data processing and report generation. This method will die with a message if it fails. The return value is not defined. This is where all of the work is done. =item $reporter->getFileStats( ); When called after calling C<$reporter-Ereport()>, will return a hash containing the cumulative totals for each file. my $stats = $reporter->getStats(); $stats->{FILENAME}->{time}; # might hold 0.25, the total runtime of this file>> Fields are time, calls, time/call, html-safe. =item Devel::NYTProf::Reader::calculate_sd( @stats ); Calculates the standard deviation and mean of the values in @stats, returns them as a list. =item Devel::NYTProf::Reader::calculate_median_absolute_deviation( @stats ); Calculates the absolute median deviation and mean of the values in @stats, returns them as a list. =item $reporter->_output_additional( $file, @data ); If you need to create a static file in the output directory, you can use this subroutine. It is currently used to dump the CSS file into the html output =item $reporter->getDatabaseTime( ); Implemented in XS. This function returns the time that the NYTProf database was created. You should not need to use this. =item $reporter->process( $file ); Implemented in XS. This function tell does the actual parsing of the database. It's fairly complicated and you're better off letting L<new()>> invoke it for you. The return value is a series of nested hash refs and array refs containing the parsed data. =back =head1 BUGS Windows support. I have no idea if it will work, but the profiler will NOT. =head1 EXPORT None by default. Object Oriented. =head1 SEE ALSO Mailing list and discussion at L Public SVN Repository and hacking instructions at L Take a look at the scripts which implement this module, L and L. They are probably all that you will need and provide an excellent jumping point into writing your own custom reports. You'll need to install and run L before you can use anything that implements this module... but this is easy (see L<"SYNOPSIS">) =head1 AUTHOR Adam Kaplan, akaplan at nytimes dotcom =head1 COPYRIGHT AND LICENSE Copyright (C) 2008 by Adam Kaplan and The New York Times Company. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.8.8 or, at your option, any later version of Perl 5 you may have available. =cut