# Refactor.pm - refactor Perl code. # $Header: $ # ############################################################################### =head1 NAME Devel::Refactor - Perl extension for refactoring Perl code. =head1 VERSION $Revision: $ This is the CVS revision number. =head1 SYNOPSIS use Devel::Refactor; my $refactory = Devel::Refactor->new; my ($new_sub_call,$new_sub_code) = $refactory->extract_subroutine($sub_name, $code_snippet); my $files_to_change = $refactory->rename_subroutine('./path/to/dir', 'oldSubName','newSubName'); # $files_to_change is a hashref where keys are file names, and values are # arrays of hashes with line_number => new_text =head1 ABSTRACT Perl module that facilitates refactoring Perl code. =head1 DESCRIPTION The B module is for code refactoring. While B may be used from Perl programs, it is also designed to be used with the B plug-in for the B integrated development environment. =cut package Devel::Refactor; use strict; use warnings; use Cwd; use File::Basename; require Exporter; our @ISA = qw(Exporter); # Items to export into callers namespace by default. Note: do not export # names by default without a very good reason. Use EXPORT_OK instead. # Do not simply export all your public functions/methods/constants. # This allows declaration use Dev::Refactor ':all'; # If you do not need this, moving things directly into @EXPORT or @EXPORT_OK # will save memory. our %EXPORT_TAGS = ( 'all' => [ qw( ) ] ); our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); our @EXPORT = qw( ); our $VERSION = '0.05'; our $DEBUG = 0; # Preloaded methods go here. our %perl_file_extensions = ( '\.pl$' => 1, '\.pm$' => 1, '\.pod$' => 1, ); =head1 CLASS METHODS Just the constructor for now. =head2 new Returns a new B object. =cut # TODO: List the object properties that are initialized. sub new { my $class = shift; $DEBUG = shift; # TODO: Should these really be object properties? No harm I guess, but most # of them are for the extract_subroutine method, and so maybe they # should go in a data structure dedicated to that method? my $self = { sub_name => '', code_snippet => '', return_snippet => '', return_sub_call => '', eval_err => '', scalar_vars => {}, array_vars => {}, hash_vars => {}, local_scalars => {}, loop_scalars => {}, local_arrays => {}, local_hashes => {}, parms => [], inner_retvals => [], outer_retvals => [], perl_file_extensions => { %perl_file_extensions }, }; bless $self, $class; return $self; } =head1 PUBLIC OBJECT METHODS Call on a object returned by new(). =head2 extract_subroutine($new_name,$old_code [,$syntax_check]) Pass it a snippet of Perl code that belongs in its own subroutine as well as a name for that sub. It figures out which variables need to be passed into the sub, and which variables might be passed back. It then produces the sub along with a call to the sub. Hashes and arrays within the code snippet are converted to hashrefs and arrayrefs. If the I argument is true then a sytax check is performed on the refactored code. Example: $new_name = 'newSub'; $old_code = <<'eos'; my @results; my %hash; my $date = localtime; $hash{foo} = 'value 1'; $hash{bar} = 'value 2'; for my $loopvar (@array) { print "Checking $loopvar\n"; push @results, $hash{$loopvar} || ''; } eos ($new_sub_call,$new_code) = $refactory->extract_subroutine($new_name,$old_code); # $new_sub_call is 'my ($date, $hash, $results) = newSub (\@array);' # $new_code is # sub newSub { # my $array = shift; # # my @results; # my %hash; # my $date = localtime; # $hash{foo} = 'value 1'; # $hash{bar} = 'value 2'; # for my $loopvar (@$array) { # print "Checking $loopvar\n"; # push @results, $hash{$loopvar} || ''; # } # # # return ($date, \%hash, \@results); # } Included in the examples directory is a script for use in KDE under Linux. The script gets its code snippet from the KDE clipboard and returns the transformed code the same way. The new sub name is prompted for via STDIN. =cut sub extract_subroutine { my $self = shift; my $sub_name = shift; my $code_snippet = shift; my $syntax_check = shift; $DEBUG and print STDERR "sub name : $sub_name\n"; $DEBUG and print STDERR "snippet : $code_snippet\n"; $self->{sub_name} = $sub_name; $self->{code_snippet} = $code_snippet; $self->_parse_vars(); $self->_parse_local_vars(); $self->_transform_snippet(); if ($syntax_check) { $self->_syntax_check(); } return ( @$self{'return_sub_call','return_snippet'} ); } =head2 rename_subroutine($where,$old_name,$new_name,[$max_depth]) I is one of: path-to-file path-to-directory If I is a directory then all Perl files (default is C<.pl>, C<.pm>, and C<.pod> See the B method.) in that directory and its' descendents (to I deep,) are searched. Default for I is 0 -- just the directory itself; I of 1 means the specified directory, and it's immeadiate sub-directories; I of 2 means the specified directory, it's sub-directories, and their sub-directrories, and so forth. If you want to scan very deep, use a high number like 99. If no matches are found then returns I, otherwise: Returns a hashref that tells you which files you might want to change, and for each file gives you the line numbers and proposed new text for that line. The hashref looks like this, where I was found on two lines in the first file and on one line in the second file: { ./path/to/file1.pl => [ { 11 => "if (myClass->newName($x)) {\n" }, { 27 => "my $result = myClass->newName($foo);\n"}, ], ./path/to/file2.pm => [ { 235 => "sub newName {\n"}, ], } The keys are paths to individual files. The values are arraryrefs containing hashrefs where the keys are the line numbers where I was found and the values are the proposed new line, with I changed to I. =cut sub rename_subroutine { my $self = shift; my $where = shift; my $old_name = shift; my $new_name = shift; my $max_depth = shift || 0; # How many level to descend into directories return undef unless ($new_name and $old_name); $DEBUG and warn "Looking for $where in ", getcwd(), "\n"; my $found = {}; # hashref of file names if (-f $where){ # it's a file or filehandle $found->{$where} = $self->_scan_file_for_string ($old_name,$new_name,$where); } elsif ( -d $where ) { # it's a directory or directory handle $self->_scan_directory_for_string($old_name,$new_name,$where,$found,$max_depth); } else { # uh oh. Should we allow it to be a code snippet? die "'$where' does not appear to be a file nor a directory." } return %$found ? $found : undef; } =head2 is_perlfile($filename) Takes a filename or path and returns true if the file has one of the extensions in B, otherwise returns false. =cut sub is_perlfile { my ($self,$filename) = @_; my ($name,$path,$suffix) = fileparse($filename,keys %{$self->perl_file_extensions}); return $suffix; } =head1 OBJECT ACCESSORS These object methods return various data structures that may be stored in a B object. In some cases the method also allows setting the property, e.g. B. =cut =head2 get_new_code Returns the I object property. =cut sub get_new_code{ my $self = shift; return $self->{return_snippet}; } =head2 get_eval_results Returns the I object property. =cut sub get_eval_results{ my $self = shift; return $self->{eval_err}; } =head2 get_sub_call Returns the I object property. =cut sub get_sub_call{ my $self = shift; return $self->{return_sub_call}; } =head2 get_scalars Returns an array of the keys from I object property. =cut sub get_scalars { my $self = shift; return sort keys %{ $self->{scalar_vars} }; } =head2 get_arrays Returns an array of the keys from the I object property. =cut sub get_arrays { my $self = shift; return sort keys %{ $self->{array_vars} }; } =head2 get_hashes Returns an array of the keys from the I object property. =cut sub get_hashes { my $self = shift; return sort keys %{ $self->{hash_vars} }; } =head2 get_local_scalars Returns an array of the keys from the I object property. =cut sub get_local_scalars { my $self = shift; return sort keys %{ $self->{local_scalars} }; } =head2 get_local_arrays Returns an array of the keys from the I object property. =cut sub get_local_arrays { my $self = shift; return sort keys %{ $self->{local_arrays} }; } =head2 get_local_hashes Returns an array of the keys from the I object property. =cut sub get_local_hashes { my $self = shift; return sort keys %{ $self->{local_hashes} }; } =head2 perl_file_extensions([$arrayref|$hashref]) Returns a hashref where the keys are regular expressions that match filename extensions that we think are for Perl files. Default are C<.pl>, C<.pm>, and C<.pod> If passed a hashref then it replaces the current values for this object. The keys should be regular expressions, e.g. C<\.cgi$>. If passed an arrayref then the list of values are added as valid Perl filename extensions. The list should be filename extensions, NOT regular expressions, For example: my @additonal_filetypes = qw( .ipl .cgi ); my $new_hash = $refactory->perl_file_extensions(\@additional_filetypes); # $new_hash = { # '\.pl$' => 1, # '\.pm$' => 1, # '\.pod$' => 1, # '\.ipl$' => 1, # '\.cgi$' => 1, # '\.t$' => 1, # } =cut sub perl_file_extensions { my($self,$args) = @_; if (ref $args eq 'HASH') { $self->{perl_file_extensions} = $args; } elsif (ref $args eq 'ARRAY') { map $self->{perl_file_extensions}->{"\\$_\$"} = 1 , @$args; } return $self->{perl_file_extensions}; } =head1 TODO LIST =over 2 =item Come up with a more uniform approach to B. =item Add more refactoring features, such as I. =item Add a SEE ALSO section with URLs for eclipse/EPIC, refactoring.com, etc. =back =cut ################################################################################### ############################## Utility Methods #################################### sub _parse_vars { my $self = shift; my $var; my $hint; # find the variables while ( $self->{code_snippet} =~ /([\$\@]\w+?)(\W\W)/g ) { $var = $1; $hint = $2; if ( $hint =~ /^{/ ) { #}/ $var =~ s/\$/\%/; $self->{hash_vars}->{$var}++; } elsif ( $hint =~ /^\[>/ ) { $var =~ s/\$/\@/; $self->{array_vars}->{$var}++; } elsif ( $var =~ /^\@/ ){ $self->{array_vars}->{$var}++; } elsif ( $var =~ /^\%/ ) { $self->{hash_vars}->{$var}++; } else { $self->{scalar_vars}->{$var}++; } } } sub _parse_local_vars { my $self = shift; my $reg; my $reg2; my $reg3; # To find loops variables declared in for and foreach # figure out which are declared in the snippet foreach my $var ( keys %{ $self->{scalar_vars} } ) { $reg = "\\s*my\\s*\\$var\\s*[=;\(]"; $reg2 = "\\s*my\\s*\\(.*?\\$var.*?\\)"; $reg3 = "(?:for|foreach)\\s+my\\s*\\$var\\s*\\("; if ( $var =~ /(?:\$\d+$|\$[ab]$)/ ) { $self->{local_scalars}->{$var}++; } elsif ( $self->{code_snippet} =~ /$reg|$reg2/ ) { $self->{local_scalars}->{$var}++; # skip loop variables if ( $self->{code_snippet} =~ /$reg3/ ) { $self->{loop_scalars}->{$var}++; } } } foreach my $var ( keys %{ $self->{array_vars}} ) { $var =~ s/\$/\@/; $reg = "\\s*my\\s*\\$var\\s*[=;\(]"; $reg2 = "\\s*my\\s*\\(.*?\\$var.*?\\)"; if ( $self->{code_snippet} =~ /$reg|$reg2/ ) { $self->{local_arrays}->{$var}++; } } foreach my $var ( keys %{ $self->{hash_vars}} ) { $var =~ s/\$/\%/; $reg = "\\s*my\\s*\\$var\\s*[=;\(]"; $reg2 = "\\s*my\\s*\\(.*?\\$var.*?\\)"; if ( $self->{code_snippet} =~ /$reg|$reg2/ ) { $self->{local_hashes}->{$var}++; } } } sub _syntax_check{ my $self = shift; my $tmp; my $eval_stmt = "my (". join ', ', @{$self->{parms}}; $eval_stmt .= ");\n"; $eval_stmt .= $self->get_sub_call(); $eval_stmt .= $self->get_new_code(); $self->{eval_code} = $eval_stmt; eval " $eval_stmt "; if ($@) { $self->{eval_err} = $@; my @errs = split /\n/, $self->{eval_err}; my @tmp = split /\n/, $self->{return_snippet}; my $line; foreach my $err (@errs){ if ($err =~ /line\s(\d+)/){ $line = ($1 - 3); $tmp[$line] .= " #<--- ".$err; } } $self->{return_snippet} = join "\n", @tmp; } } sub _transform_snippet { my $self = shift; my $reg; my $reg2; my $arref; my $href; # Create a sub call that accepts all non-locally declared # vars as parameters foreach my $parm ( keys %{$self->{scalar_vars} } ) { if ( !defined( $self->{local_scalars}->{$parm} ) ) { push @{$self->{parms}}, $parm; } else { # Don't return loop variables next if grep $parm eq $_, keys %{$self->{loop_scalars}}; if ( $parm !~ /\$\d+$/ ) { push @{$self->{inner_retvals}}, $parm; push @{$self->{outer_retvals}}, $parm; } } } foreach my $parm ( keys %{ $self->{array_vars}} ) { $parm =~ s/\$/\@/; if ( !defined( $self->{local_arrays}->{$parm} ) ) { push @{$self->{parms}}, $parm; $reg2 = "\\$parm"; ($arref = $parm) =~ s/\@/\$/; $self->{code_snippet} =~ s/$reg2/\@$arref/g; $parm =~ s/\@/\$/; $reg = "\\$parm\\["; $self->{code_snippet} =~ s/$reg/$parm\-\>\[/g; } else { push @{$self->{inner_retvals}}, "\\$parm"; # \@array push @{$self->{outer_retvals}}, "$parm"; } } foreach my $parm ( keys %{ $self->{hash_vars} } ) { $parm =~ s/\$/\%/; if ( !defined( $self->{local_hashes}->{$parm} ) ) { push @{$self->{parms}}, $parm; $reg2 = "\\$parm"; ($href = $parm) =~ s/\%/\$/; $self->{code_snippet} =~ s/$reg2/\%$href/g; $parm =~ s/\%/\$/; $reg = "\\$parm\\{"; $self->{code_snippet} =~ s/$reg/$parm\-\>\{/g; } else { push @{$self->{inner_retvals}}, "\\$parm"; # \%hash push @{$self->{outer_retvals}}, "$parm"; } } my $retval; my $return_call; my $tmp; $return_call .= "my ("; $return_call .= join ', ', map {my $tmp; ($tmp = $_) =~ s/[\@\%](.*)/\$$1/; $tmp} sort @{$self->{outer_retvals}}; $return_call .= ") = ".$self->{sub_name}." ("; $return_call .= join ', ', map { ( $tmp = $_ ) =~ s/(\%|\@)(.*)/\\$1$2/; $tmp } @{$self->{parms}}; $return_call .= ");\n"; $retval = "sub ".$self->{sub_name}." {\n"; $retval .= join '', map {($tmp = $_) =~ tr/%@/$/; " my $tmp = shift;\n" } @{$self->{parms}}; $retval .= "\n" . $self->{code_snippet}; $retval .= "\n return ("; $retval .= join ', ', sort @{$self->{inner_retvals}}; $retval .= ");\n"; $retval .= "}\n"; # protect quotes and dollar signs # $retval =~ s/\"/\\"/g; # $retval =~ s/(\$)/\\$1/g; $self->{return_snippet} = $retval; $self->{return_sub_call} = $return_call; } # returns arrayref of hashrefs, or undef sub _scan_file_for_string { my $self = shift; my $old_name = shift; my $new_name = shift; my $file = shift; my $fh; open $fh, "$file" || die("Could not open code file '$file' - $!"); my $line_number = 0; my @lines; my $regex1 = '(\W)(' . $old_name . ')(\W)'; # Surrounded by non-word characters my $regex2 = "^$old_name(" . '\W)'; # At start of line while (<$fh>) { $line_number++; # Look for $old_name surrounded by non-word characters, or at start of line if (/$regex1/o or /$regex2/o) { my $new_line = $_; $new_line =~ s/$regex1/$1$new_name$3/g; $new_line =~ s/$regex2/$new_name$1/; my $hash = {$line_number => $new_line}; push @lines, $hash; } } close $fh; return @lines ? \@lines : undef; } # Scan a directory, possibly recuring into sub-directories. sub _scan_directory_for_string { my ($self,$old_name,$new_name,$where,$hash,$depth) = @_; my $dh; opendir $dh, $where || die "Could not open directory '$where': $!"; my @files = grep { $_ ne '.' and $_ ne '..' } readdir $dh; close $dh; $depth--; foreach my $file (@files) { $file = "$where/$file"; # add the directory back on to the path if (-f $file && $self->is_perlfile($file)) { $hash->{$file} = $self->_scan_file_for_string($old_name,$new_name,$file); } if (-d $file && $depth >= 0) { # It's a directory, so call this method on the directory. $self->_scan_directory_for_string($old_name,$new_name,$file,$hash,$depth); } } return $hash; } 1; # File must return true when compiled. Keep Perl happy, snuggly and warm. __END__ =head1 AUTHOR Scott Sotka, Essotka@barracudanetworks.comE =head1 COPYRIGHT AND LICENSE Copyright 2005 by Scott Sotka This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut