############################################################ # # perltidy - a perl script indenter and formatter # # Copyright (c) 2000-2009 by Steve Hancock # Distributed under the GPL license agreement; see file COPYING # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # For brief instructions instructions, try 'perltidy -h'. # For more complete documentation, try 'man perltidy' # or visit http://perltidy.sourceforge.net # # This script is an example of the default style. It was formatted with: # # perltidy Tidy.pm # # Code Contributions: # Michael Cartmell supplied code for adaptation to VMS and helped with # v-strings. # Hugh S. Myers supplied sub streamhandle and the supporting code to # create a Perl::Tidy module which can operate on strings, arrays, etc. # Yves Orton supplied coding to help detect Windows versions. # Axel Rose supplied a patch for MacPerl. # Sebastien Aperghis-Tramoni supplied a patch for the defined or operator. # Dan Tyrell contributed a patch for binary I/O. # Ueli Hugenschmidt contributed a patch for -fpsc # Many others have supplied key ideas, suggestions, and bug reports; # see the CHANGES file. # ############################################################ package Perl::Tidy; use 5.004; # need IO::File from 5.004 or later BEGIN { $^W = 1; } # turn on warnings use strict; use Exporter; use Carp; $|++; use vars qw{ $VERSION @ISA @EXPORT $missing_file_spec }; @ISA = qw( Exporter ); @EXPORT = qw( &perltidy ); use IO::File; use File::Basename; BEGIN { ( $VERSION = q($Id: Tidy.pm,v 1.74 2009/06/16 13:56:49 perltidy Exp $) ) =~ s/^.*\s+(\d+)\/(\d+)\/(\d+).*$/$1$2$3/; # all one line for MakeMaker } sub streamhandle { # given filename and mode (r or w), create an object which: # has a 'getline' method if mode='r', and # has a 'print' method if mode='w'. # The objects also need a 'close' method. # # How the object is made: # # if $filename is: Make object using: # ---------------- ----------------- # '-' (STDIN if mode = 'r', STDOUT if mode='w') # string IO::File # ARRAY ref Perl::Tidy::IOScalarArray (formerly IO::ScalarArray) # STRING ref Perl::Tidy::IOScalar (formerly IO::Scalar) # object object # (check for 'print' method for 'w' mode) # (check for 'getline' method for 'r' mode) my $ref = ref( my $filename = shift ); my $mode = shift; my $New; my $fh; # handle a reference if ($ref) { if ( $ref eq 'ARRAY' ) { $New = sub { Perl::Tidy::IOScalarArray->new(@_) }; } elsif ( $ref eq 'SCALAR' ) { $New = sub { Perl::Tidy::IOScalar->new(@_) }; } else { # Accept an object with a getline method for reading. Note: # IO::File is built-in and does not respond to the defined # operator. If this causes trouble, the check can be # skipped and we can just let it crash if there is no # getline. if ( $mode =~ /[rR]/ ) { if ( $ref eq 'IO::File' || defined &{ $ref . "::getline" } ) { $New = sub { $filename }; } else { $New = sub { undef }; confess <new(@_) }; } } $fh = $New->( $filename, $mode ) or warn "Couldn't open file:$filename in mode:$mode : $!\n"; return $fh, ( $ref or $filename ); } sub find_input_line_ending { # Peek at a file and return first line ending character. # Quietly return undef in case of any trouble. my ($input_file) = @_; my $ending; # silently ignore input from object or stdin if ( ref($input_file) || $input_file eq '-' ) { return $ending; } open( INFILE, $input_file ) || return $ending; binmode INFILE; my $buf; read( INFILE, $buf, 1024 ); close INFILE; if ( $buf && $buf =~ /([\012\015]+)/ ) { my $test = $1; # dos if ( $test =~ /^(\015\012)+$/ ) { $ending = "\015\012" } # mac elsif ( $test =~ /^\015+$/ ) { $ending = "\015" } # unix elsif ( $test =~ /^\012+$/ ) { $ending = "\012" } # unknown else { } } # no ending seen else { } return $ending; } sub catfile { # concatenate a path and file basename # returns undef in case of error BEGIN { eval "require File::Spec"; $missing_file_spec = $@; } # use File::Spec if we can unless ($missing_file_spec) { return File::Spec->catfile(@_); } # Perl 5.004 systems may not have File::Spec so we'll make # a simple try. We assume File::Basename is available. # return undef if not successful. my $name = pop @_; my $path = join '/', @_; my $test_file = $path . $name; my ( $test_name, $test_path ) = fileparse($test_file); return $test_file if ( $test_name eq $name ); return undef if ( $^O eq 'VMS' ); # this should work at least for Windows and Unix: $test_file = $path . '/' . $name; ( $test_name, $test_path ) = fileparse($test_file); return $test_file if ( $test_name eq $name ); return undef; } sub make_temporary_filename { # Make a temporary filename. # # The POSIX tmpnam() function tends to be unreliable for non-unix # systems (at least for the win32 systems that I've tested), so use # a pre-defined name. A slight disadvantage of this is that two # perltidy runs in the same working directory may conflict. # However, the chance of that is small and managable by the user. # An alternative would be to check for the file's existance and use, # say .TMP0, .TMP1, etc, but that scheme has its own problems. So, # keep it simple. my $name = "perltidy.TMP"; if ( $^O =~ /win32|dos/i || $^O eq 'VMS' || $^O eq 'MacOs' ) { return $name; } eval "use POSIX qw(tmpnam)"; if ($@) { return $name } use IO::File; # just make a couple of tries before giving up and using the default for ( 0 .. 1 ) { my $tmpname = tmpnam(); my $fh = IO::File->new( $tmpname, O_RDWR | O_CREAT | O_EXCL ); if ($fh) { $fh->close(); return ($tmpname); last; } } return ($name); } # Here is a map of the flow of data from the input source to the output # line sink: # # LineSource-->Tokenizer-->Formatter-->VerticalAligner-->FileWriter--> # input groups output # lines tokens lines of lines lines # lines # # The names correspond to the package names responsible for the unit processes. # # The overall process is controlled by the "main" package. # # LineSource is the stream of input lines # # Tokenizer analyzes a line and breaks it into tokens, peeking ahead # if necessary. A token is any section of the input line which should be # manipulated as a single entity during formatting. For example, a single # ',' character is a token, and so is an entire side comment. It handles # the complexities of Perl syntax, such as distinguishing between '<<' as # a shift operator and as a here-document, or distinguishing between '/' # as a divide symbol and as a pattern delimiter. # # Formatter inserts and deletes whitespace between tokens, and breaks # sequences of tokens at appropriate points as output lines. It bases its # decisions on the default rules as modified by any command-line options. # # VerticalAligner collects groups of lines together and tries to line up # certain tokens, such as '=>', '#', and '=' by adding whitespace. # # FileWriter simply writes lines to the output stream. # # The Logger package, not shown, records significant events and warning # messages. It writes a .LOG file, which may be saved with a # '-log' or a '-g' flag. { # variables needed by interrupt handler: my $tokenizer; my $input_file; # this routine may be called to give a status report if interrupted. If a # parameter is given, it will call exit with that parameter. This is no # longer used because it works under Unix but not under Windows. sub interrupt_handler { my $exit_flag = shift; print STDERR "perltidy interrupted"; if ($tokenizer) { my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number(); print STDERR " at line $input_line_number"; } if ($input_file) { if ( ref $input_file ) { print STDERR " of reference to:" } else { print STDERR " of file:" } print STDERR " $input_file"; } print STDERR "\n"; exit $exit_flag if defined($exit_flag); } sub perltidy { my %defaults = ( argv => undef, destination => undef, formatter => undef, logfile => undef, errorfile => undef, perltidyrc => undef, source => undef, stderr => undef, dump_options => undef, dump_options_type => undef, dump_getopt_flags => undef, dump_options_category => undef, dump_options_range => undef, dump_abbreviations => undef, ); # don't overwrite callers ARGV local @ARGV = @ARGV; my %input_hash = @_; if ( my @bad_keys = grep { !exists $defaults{$_} } keys %input_hash ) { local $" = ')('; my @good_keys = sort keys %defaults; @bad_keys = sort @bad_keys; confess <('dump_options'); my $dump_getopt_flags = $get_hash_ref->('dump_getopt_flags'); my $dump_options_category = $get_hash_ref->('dump_options_category'); my $dump_abbreviations = $get_hash_ref->('dump_abbreviations'); my $dump_options_range = $get_hash_ref->('dump_options_range'); # validate dump_options_type if ( defined($dump_options) ) { unless ( defined($dump_options_type) ) { $dump_options_type = 'perltidyrc'; } unless ( $dump_options_type =~ /^(perltidyrc|full)$/ ) { croak <new(); } # see if ARGV is overridden if ( defined($argv) ) { my $rargv = ref $argv; if ( $rargv eq 'SCALAR' ) { $argv = $$argv; $rargv = undef } # ref to ARRAY if ($rargv) { if ( $rargv eq 'ARRAY' ) { @ARGV = @$argv; } else { croak <{$opt} = $flag; } } if ( defined($dump_options_category) ) { $quit_now = 1; %{$dump_options_category} = %{$roption_category}; } if ( defined($dump_options_range) ) { $quit_now = 1; %{$dump_options_range} = %{$roption_range}; } if ( defined($dump_abbreviations) ) { $quit_now = 1; %{$dump_abbreviations} = %{$rexpansion}; } if ( defined($dump_options) ) { $quit_now = 1; %{$dump_options} = %{$rOpts}; } return if ($quit_now); # make printable string of options for this run as possible diagnostic my $readable_options = readable_options( $rOpts, $roption_string ); # dump from command line if ( $rOpts->{'dump-options'} ) { print STDOUT $readable_options; exit 1; } check_options( $rOpts, $is_Windows, $Windows_type, $rpending_complaint ); if ($user_formatter) { $rOpts->{'format'} = 'user'; } # there must be one entry here for every possible format my %default_file_extension = ( tidy => 'tdy', html => 'html', user => '', ); # be sure we have a valid output format unless ( exists $default_file_extension{ $rOpts->{'format'} } ) { my $formats = join ' ', sort map { "'" . $_ . "'" } keys %default_file_extension; my $fmt = $rOpts->{'format'}; die "-format='$fmt' but must be one of: $formats\n"; } my $output_extension = make_extension( $rOpts->{'output-file-extension'}, $default_file_extension{ $rOpts->{'format'} }, $dot ); my $backup_extension = make_extension( $rOpts->{'backup-file-extension'}, 'bak', $dot ); my $html_toc_extension = make_extension( $rOpts->{'html-toc-extension'}, 'toc', $dot ); my $html_src_extension = make_extension( $rOpts->{'html-src-extension'}, 'src', $dot ); # check for -b option; my $in_place_modify = $rOpts->{'backup-and-modify-in-place'} && $rOpts->{'format'} eq 'tidy' # silently ignore unless beautify mode && @ARGV > 0; # silently ignore if standard input; # this allows -b to be in a .perltidyrc file # without error messages when running from an editor # turn off -b with warnings in case of conflicts with other options if ($in_place_modify) { if ( $rOpts->{'standard-output'} ) { warn "Ignoring -b; you may not use -b and -st together\n"; $in_place_modify = 0; } if ($destination_stream) { warn "Ignoring -b; you may not specify a destination array and -b together\n"; $in_place_modify = 0; } if ($source_stream) { warn "Ignoring -b; you may not specify a source array and -b together\n"; $in_place_modify = 0; } if ( $rOpts->{'outfile'} ) { warn "Ignoring -b; you may not use -b and -o together\n"; $in_place_modify = 0; } if ( defined( $rOpts->{'output-path'} ) ) { warn "Ignoring -b; you may not use -b and -opath together\n"; $in_place_modify = 0; } } Perl::Tidy::Formatter::check_options($rOpts); if ( $rOpts->{'format'} eq 'html' ) { Perl::Tidy::HtmlWriter->check_options($rOpts); } # make the pattern of file extensions that we shouldn't touch my $forbidden_file_extensions = "(($dot_pattern)(LOG|DEBUG|ERR|TEE)"; if ($output_extension) { my $ext = quotemeta($output_extension); $forbidden_file_extensions .= "|$ext"; } if ( $in_place_modify && $backup_extension ) { my $ext = quotemeta($backup_extension); $forbidden_file_extensions .= "|$ext"; } $forbidden_file_extensions .= ')$'; # Create a diagnostics object if requested; # This is only useful for code development my $diagnostics_object = undef; if ( $rOpts->{'DIAGNOSTICS'} ) { $diagnostics_object = Perl::Tidy::Diagnostics->new(); } # no filenames should be given if input is from an array if ($source_stream) { if ( @ARGV > 0 ) { die "You may not specify any filenames when a source array is given\n"; } # we'll stuff the source array into ARGV unshift( @ARGV, $source_stream ); # No special treatment for source stream which is a filename. # This will enable checks for binary files and other bad stuff. $source_stream = undef unless ref($source_stream); } # use stdin by default if no source array and no args else { unshift( @ARGV, '-' ) unless @ARGV; } # loop to process all files in argument list my $number_of_files = @ARGV; my $formatter = undef; $tokenizer = undef; while ( $input_file = shift @ARGV ) { my $fileroot; my $input_file_permissions; #--------------------------------------------------------------- # determine the input file name #--------------------------------------------------------------- if ($source_stream) { $fileroot = "perltidy"; } elsif ( $input_file eq '-' ) { # '-' indicates input from STDIN $fileroot = "perltidy"; # root name to use for .ERR, .LOG, etc $in_place_modify = 0; } else { $fileroot = $input_file; unless ( -e $input_file ) { # file doesn't exist - check for a file glob if ( $input_file =~ /([\?\*\[\{])/ ) { # Windows shell may not remove quotes, so do it my $input_file = $input_file; if ( $input_file =~ /^\'(.+)\'$/ ) { $input_file = $1 } if ( $input_file =~ /^\"(.+)\"$/ ) { $input_file = $1 } my $pattern = fileglob_to_re($input_file); ##eval "/$pattern/"; if ( !$@ && opendir( DIR, './' ) ) { my @files = grep { /$pattern/ && !-d $_ } readdir(DIR); closedir(DIR); if (@files) { unshift @ARGV, @files; next; } } } print "skipping file: '$input_file': no matches found\n"; next; } unless ( -f $input_file ) { print "skipping file: $input_file: not a regular file\n"; next; } unless ( ( -T $input_file ) || $rOpts->{'force-read-binary'} ) { print "skipping file: $input_file: Non-text (override with -f)\n"; next; } # we should have a valid filename now $fileroot = $input_file; $input_file_permissions = ( stat $input_file )[2] & 07777; if ( $^O eq 'VMS' ) { ( $fileroot, $dot ) = check_vms_filename($fileroot); } # add option to change path here if ( defined( $rOpts->{'output-path'} ) ) { my ( $base, $old_path ) = fileparse($fileroot); my $new_path = $rOpts->{'output-path'}; unless ( -d $new_path ) { unless ( mkdir $new_path, 0777 ) { die "unable to create directory $new_path: $!\n"; } } my $path = $new_path; $fileroot = catfile( $path, $base ); unless ($fileroot) { die <new( $input_file, $rOpts, $rpending_logfile_message ); next unless ($source_object); # register this file name with the Diagnostics package $diagnostics_object->set_input_file($input_file) if $diagnostics_object; #--------------------------------------------------------------- # determine the output file name #--------------------------------------------------------------- my $output_file = undef; my $actual_output_extension; if ( $rOpts->{'outfile'} ) { if ( $number_of_files <= 1 ) { if ( $rOpts->{'standard-output'} ) { die "You may not use -o and -st together\n"; } elsif ($destination_stream) { die "You may not specify a destination array and -o together\n"; } elsif ( defined( $rOpts->{'output-path'} ) ) { die "You may not specify -o and -opath together\n"; } elsif ( defined( $rOpts->{'output-file-extension'} ) ) { die "You may not specify -o and -oext together\n"; } $output_file = $rOpts->{outfile}; # make sure user gives a file name after -o if ( $output_file =~ /^-/ ) { die "You must specify a valid filename after -o\n"; } # do not overwrite input file with -o if ( defined($input_file_permissions) && ( $output_file eq $input_file ) ) { die "Use 'perltidy -b $input_file' to modify in-place\n"; } } else { die "You may not use -o with more than one input file\n"; } } elsif ( $rOpts->{'standard-output'} ) { if ($destination_stream) { die "You may not specify a destination array and -st together\n"; } $output_file = '-'; if ( $number_of_files <= 1 ) { } else { die "You may not use -st with more than one input file\n"; } } elsif ($destination_stream) { $output_file = $destination_stream; } elsif ($source_stream) { # source but no destination goes to stdout $output_file = '-'; } elsif ( $input_file eq '-' ) { $output_file = '-'; } else { if ($in_place_modify) { $output_file = IO::File->new_tmpfile() or die "cannot open temp file for -b option: $!\n"; } else { $actual_output_extension = $output_extension; $output_file = $fileroot . $output_extension; } } # the 'sink_object' knows how to write the output file my $tee_file = $fileroot . $dot . "TEE"; my $line_separator = $rOpts->{'output-line-ending'}; if ( $rOpts->{'preserve-line-endings'} ) { $line_separator = find_input_line_ending($input_file); } # Eventually all I/O may be done with binmode, but for now it is # only done when a user requests a particular line separator # through the -ple or -ole flags my $binmode = 0; if ( defined($line_separator) ) { $binmode = 1 } else { $line_separator = "\n" } my $sink_object = Perl::Tidy::LineSink->new( $output_file, $tee_file, $line_separator, $rOpts, $rpending_logfile_message, $binmode ); #--------------------------------------------------------------- # initialize the error logger #--------------------------------------------------------------- my $warning_file = $fileroot . $dot . "ERR"; if ($errorfile_stream) { $warning_file = $errorfile_stream } my $log_file = $fileroot . $dot . "LOG"; if ($logfile_stream) { $log_file = $logfile_stream } my $logger_object = Perl::Tidy::Logger->new( $rOpts, $log_file, $warning_file, $saw_extrude ); write_logfile_header( $rOpts, $logger_object, $config_file, $rraw_options, $Windows_type, $readable_options, ); if ($$rpending_logfile_message) { $logger_object->write_logfile_entry($$rpending_logfile_message); } if ($$rpending_complaint) { $logger_object->complain($$rpending_complaint); } #--------------------------------------------------------------- # initialize the debug object, if any #--------------------------------------------------------------- my $debugger_object = undef; if ( $rOpts->{DEBUG} ) { $debugger_object = Perl::Tidy::Debugger->new( $fileroot . $dot . "DEBUG" ); } #--------------------------------------------------------------- # create a formatter for this file : html writer or pretty printer #--------------------------------------------------------------- # we have to delete any old formatter because, for safety, # the formatter will check to see that there is only one. $formatter = undef; if ($user_formatter) { $formatter = $user_formatter; } elsif ( $rOpts->{'format'} eq 'html' ) { $formatter = Perl::Tidy::HtmlWriter->new( $fileroot, $output_file, $actual_output_extension, $html_toc_extension, $html_src_extension ); } elsif ( $rOpts->{'format'} eq 'tidy' ) { $formatter = Perl::Tidy::Formatter->new( logger_object => $logger_object, diagnostics_object => $diagnostics_object, sink_object => $sink_object, ); } else { die "I don't know how to do -format=$rOpts->{'format'}\n"; } unless ($formatter) { die "Unable to continue with $rOpts->{'format'} formatting\n"; } #--------------------------------------------------------------- # create the tokenizer for this file #--------------------------------------------------------------- $tokenizer = undef; # must destroy old tokenizer $tokenizer = Perl::Tidy::Tokenizer->new( source_object => $source_object, logger_object => $logger_object, debugger_object => $debugger_object, diagnostics_object => $diagnostics_object, starting_level => $rOpts->{'starting-indentation-level'}, tabs => $rOpts->{'tabs'}, indent_columns => $rOpts->{'indent-columns'}, look_for_hash_bang => $rOpts->{'look-for-hash-bang'}, look_for_autoloader => $rOpts->{'look-for-autoloader'}, look_for_selfloader => $rOpts->{'look-for-selfloader'}, trim_qw => $rOpts->{'trim-qw'}, ); #--------------------------------------------------------------- # now we can do it #--------------------------------------------------------------- process_this_file( $tokenizer, $formatter ); #--------------------------------------------------------------- # close the input source and report errors #--------------------------------------------------------------- $source_object->close_input_file(); # get file names to use for syntax check my $ifname = $source_object->get_input_file_copy_name(); my $ofname = $sink_object->get_output_file_copy(); #--------------------------------------------------------------- # handle the -b option (backup and modify in-place) #--------------------------------------------------------------- if ($in_place_modify) { unless ( -f $input_file ) { # oh, oh, no real file to backup .. # shouldn't happen because of numerous preliminary checks die print "problem with -b backing up input file '$input_file': not a file\n"; } my $backup_name = $input_file . $backup_extension; if ( -f $backup_name ) { unlink($backup_name) or die "unable to remove previous '$backup_name' for -b option; check permissions: $!\n"; } rename( $input_file, $backup_name ) or die "problem renaming $input_file to $backup_name for -b option: $!\n"; $ifname = $backup_name; seek( $output_file, 0, 0 ) or die "unable to rewind tmp file for -b option: $!\n"; my $fout = IO::File->new("> $input_file") or die "problem opening $input_file for write for -b option; check directory permissions: $!\n"; binmode $fout; my $line; while ( $line = $output_file->getline() ) { $fout->print($line); } $fout->close(); $output_file = $input_file; $ofname = $input_file; } #--------------------------------------------------------------- # clean up and report errors #--------------------------------------------------------------- $sink_object->close_output_file() if $sink_object; $debugger_object->close_debug_file() if $debugger_object; my $infile_syntax_ok = 0; # -1 no 0=don't know 1 yes if ($output_file) { if ($input_file_permissions) { # give output script same permissions as input script, but # make it user-writable or else we can't run perltidy again. # Thus we retain whatever executable flags were set. if ( $rOpts->{'format'} eq 'tidy' ) { chmod( $input_file_permissions | 0600, $output_file ); } # else use default permissions for html and any other format } if ( $logger_object && $rOpts->{'check-syntax'} ) { $infile_syntax_ok = check_syntax( $ifname, $ofname, $logger_object, $rOpts ); } } $logger_object->finish( $infile_syntax_ok, $formatter ) if $logger_object; } # end of loop to process all files } # end of main program } sub fileglob_to_re { # modified (corrected) from version in find2perl my $x = shift; $x =~ s#([./^\$()])#\\$1#g; # escape special characters $x =~ s#\*#.*#g; # '*' -> '.*' $x =~ s#\?#.#g; # '?' -> '.' "^$x\\z"; # match whole word } sub make_extension { # Make a file extension, including any leading '.' if necessary # The '.' may actually be an '_' under VMS my ( $extension, $default, $dot ) = @_; # Use the default if none specified $extension = $default unless ($extension); # Only extensions with these leading characters get a '.' # This rule gives the user some freedom if ( $extension =~ /^[a-zA-Z0-9]/ ) { $extension = $dot . $extension; } return $extension; } sub write_logfile_header { my ( $rOpts, $logger_object, $config_file, $rraw_options, $Windows_type, $readable_options ) = @_; $logger_object->write_logfile_entry( "perltidy version $VERSION log file on a $^O system, OLD_PERL_VERSION=$]\n" ); if ($Windows_type) { $logger_object->write_logfile_entry("Windows type is $Windows_type\n"); } my $options_string = join( ' ', @$rraw_options ); if ($config_file) { $logger_object->write_logfile_entry( "Found Configuration File >>> $config_file \n"); } $logger_object->write_logfile_entry( "Configuration and command line parameters for this run:\n"); $logger_object->write_logfile_entry("$options_string\n"); if ( $rOpts->{'DEBUG'} || $rOpts->{'show-options'} ) { $rOpts->{'logfile'} = 1; # force logfile to be saved $logger_object->write_logfile_entry( "Final parameter set for this run\n"); $logger_object->write_logfile_entry( "------------------------------------\n"); $logger_object->write_logfile_entry($readable_options); $logger_object->write_logfile_entry( "------------------------------------\n"); } $logger_object->write_logfile_entry( "To find error messages search for 'WARNING' with your editor\n"); } sub generate_options { ###################################################################### # Generate and return references to: # @option_string - the list of options to be passed to Getopt::Long # @defaults - the list of default options # %expansion - a hash showing how all abbreviations are expanded # %category - a hash giving the general category of each option # %option_range - a hash giving the valid ranges of certain options # Note: a few options are not documented in the man page and usage # message. This is because these are experimental or debug options and # may or may not be retained in future versions. # # Here are the undocumented flags as far as I know. Any of them # may disappear at any time. They are mainly for fine-tuning # and debugging. # # fll --> fuzzy-line-length # a trivial parameter which gets # turned off for the extrude option # which is mainly for debugging # chk --> check-multiline-quotes # check for old bug; to be deleted # scl --> short-concatenation-item-length # helps break at '.' # recombine # for debugging line breaks # valign # for debugging vertical alignment # I --> DIAGNOSTICS # for debugging ###################################################################### # here is a summary of the Getopt codes: # does not take an argument # =s takes a mandatory string # :s takes an optional string (DO NOT USE - filenames will get eaten up) # =i takes a mandatory integer # :i takes an optional integer (NOT RECOMMENDED - can cause trouble) # ! does not take an argument and may be negated # i.e., -foo and -nofoo are allowed # a double dash signals the end of the options list # #--------------------------------------------------------------- # Define the option string passed to GetOptions. #--------------------------------------------------------------- my @option_string = (); my %expansion = (); my %option_category = (); my %option_range = (); my $rexpansion = \%expansion; # names of categories in manual # leading integers will allow sorting my @category_name = ( '0. I/O control', '1. Basic formatting options', '2. Code indentation control', '3. Whitespace control', '4. Comment controls', '5. Linebreak controls', '6. Controlling list formatting', '7. Retaining or ignoring existing line breaks', '8. Blank line control', '9. Other controls', '10. HTML options', '11. pod2html options', '12. Controlling HTML properties', '13. Debugging', ); # These options are parsed directly by perltidy: # help h # version v # However, they are included in the option set so that they will # be seen in the options dump. # These long option names have no abbreviations or are treated specially @option_string = qw( html! noprofile no-profile npro recombine! valign! ); my $category = 13; # Debugging foreach (@option_string) { my $opt = $_; # must avoid changing the actual flag $opt =~ s/!$//; $option_category{$opt} = $category_name[$category]; } $category = 11; # HTML $option_category{html} = $category_name[$category]; # routine to install and check options my $add_option = sub { my ( $long_name, $short_name, $flag ) = @_; push @option_string, $long_name . $flag; $option_category{$long_name} = $category_name[$category]; if ($short_name) { if ( $expansion{$short_name} ) { my $existing_name = $expansion{$short_name}[0]; die "redefining abbreviation $short_name for $long_name; already used for $existing_name\n"; } $expansion{$short_name} = [$long_name]; if ( $flag eq '!' ) { my $nshort_name = 'n' . $short_name; my $nolong_name = 'no' . $long_name; if ( $expansion{$nshort_name} ) { my $existing_name = $expansion{$nshort_name}[0]; die "attempting to redefine abbreviation $nshort_name for $nolong_name; already used for $existing_name\n"; } $expansion{$nshort_name} = [$nolong_name]; } } }; # Install long option names which have a simple abbreviation. # Options with code '!' get standard negation ('no' for long names, # 'n' for abbreviations). Categories follow the manual. ########################### $category = 0; # I/O_Control ########################### $add_option->( 'backup-and-modify-in-place', 'b', '!' ); $add_option->( 'backup-file-extension', 'bext', '=s' ); $add_option->( 'force-read-binary', 'f', '!' ); $add_option->( 'format', 'fmt', '=s' ); $add_option->( 'logfile', 'log', '!' ); $add_option->( 'logfile-gap', 'g', ':i' ); $add_option->( 'outfile', 'o', '=s' ); $add_option->( 'output-file-extension', 'oext', '=s' ); $add_option->( 'output-path', 'opath', '=s' ); $add_option->( 'profile', 'pro', '=s' ); $add_option->( 'quiet', 'q', '!' ); $add_option->( 'standard-error-output', 'se', '!' ); $add_option->( 'standard-output', 'st', '!' ); $add_option->( 'warning-output', 'w', '!' ); # options which are both toggle switches and values moved here # to hide from tidyview (which does not show category 0 flags): # -ole moved here from category 1 # -sil moved here from category 2 $add_option->( 'output-line-ending', 'ole', '=s' ); $add_option->( 'starting-indentation-level', 'sil', '=i' ); ######################################## $category = 1; # Basic formatting options ######################################## $add_option->( 'check-syntax', 'syn', '!' ); $add_option->( 'entab-leading-whitespace', 'et', '=i' ); $add_option->( 'indent-columns', 'i', '=i' ); $add_option->( 'maximum-line-length', 'l', '=i' ); $add_option->( 'perl-syntax-check-flags', 'pscf', '=s' ); $add_option->( 'preserve-line-endings', 'ple', '!' ); $add_option->( 'tabs', 't', '!' ); ######################################## $category = 2; # Code indentation control ######################################## $add_option->( 'continuation-indentation', 'ci', '=i' ); $add_option->( 'line-up-parentheses', 'lp', '!' ); $add_option->( 'outdent-keyword-list', 'okwl', '=s' ); $add_option->( 'outdent-keywords', 'okw', '!' ); $add_option->( 'outdent-labels', 'ola', '!' ); $add_option->( 'outdent-long-quotes', 'olq', '!' ); $add_option->( 'indent-closing-brace', 'icb', '!' ); $add_option->( 'closing-token-indentation', 'cti', '=i' ); $add_option->( 'closing-paren-indentation', 'cpi', '=i' ); $add_option->( 'closing-brace-indentation', 'cbi', '=i' ); $add_option->( 'closing-square-bracket-indentation', 'csbi', '=i' ); $add_option->( 'brace-left-and-indent', 'bli', '!' ); $add_option->( 'brace-left-and-indent-list', 'blil', '=s' ); ######################################## $category = 3; # Whitespace control ######################################## $add_option->( 'add-semicolons', 'asc', '!' ); $add_option->( 'add-whitespace', 'aws', '!' ); $add_option->( 'block-brace-tightness', 'bbt', '=i' ); $add_option->( 'brace-tightness', 'bt', '=i' ); $add_option->( 'delete-old-whitespace', 'dws', '!' ); $add_option->( 'delete-semicolons', 'dsm', '!' ); $add_option->( 'nospace-after-keyword', 'nsak', '=s' ); $add_option->( 'nowant-left-space', 'nwls', '=s' ); $add_option->( 'nowant-right-space', 'nwrs', '=s' ); $add_option->( 'paren-tightness', 'pt', '=i' ); $add_option->( 'space-after-keyword', 'sak', '=s' ); $add_option->( 'space-for-semicolon', 'sfs', '!' ); $add_option->( 'space-function-paren', 'sfp', '!' ); $add_option->( 'space-keyword-paren', 'skp', '!' ); $add_option->( 'space-terminal-semicolon', 'sts', '!' ); $add_option->( 'square-bracket-tightness', 'sbt', '=i' ); $add_option->( 'square-bracket-vertical-tightness', 'sbvt', '=i' ); $add_option->( 'square-bracket-vertical-tightness-closing', 'sbvtc', '=i' ); $add_option->( 'trim-qw', 'tqw', '!' ); $add_option->( 'want-left-space', 'wls', '=s' ); $add_option->( 'want-right-space', 'wrs', '=s' ); ######################################## $category = 4; # Comment controls ######################################## $add_option->( 'closing-side-comment-else-flag', 'csce', '=i' ); $add_option->( 'closing-side-comment-interval', 'csci', '=i' ); $add_option->( 'closing-side-comment-list', 'cscl', '=s' ); $add_option->( 'closing-side-comment-maximum-text', 'csct', '=i' ); $add_option->( 'closing-side-comment-prefix', 'cscp', '=s' ); $add_option->( 'closing-side-comment-warnings', 'cscw', '!' ); $add_option->( 'closing-side-comments', 'csc', '!' ); $add_option->( 'closing-side-comments-balanced', 'cscb', '!' ); $add_option->( 'format-skipping', 'fs', '!' ); $add_option->( 'format-skipping-begin', 'fsb', '=s' ); $add_option->( 'format-skipping-end', 'fse', '=s' ); $add_option->( 'hanging-side-comments', 'hsc', '!' ); $add_option->( 'indent-block-comments', 'ibc', '!' ); $add_option->( 'indent-spaced-block-comments', 'isbc', '!' ); $add_option->( 'fixed-position-side-comment', 'fpsc', '=i' ); $add_option->( 'minimum-space-to-comment', 'msc', '=i' ); $add_option->( 'outdent-long-comments', 'olc', '!' ); $add_option->( 'outdent-static-block-comments', 'osbc', '!' ); $add_option->( 'static-block-comment-prefix', 'sbcp', '=s' ); $add_option->( 'static-block-comments', 'sbc', '!' ); $add_option->( 'static-side-comment-prefix', 'sscp', '=s' ); $add_option->( 'static-side-comments', 'ssc', '!' ); ######################################## $category = 5; # Linebreak controls ######################################## $add_option->( 'add-newlines', 'anl', '!' ); $add_option->( 'block-brace-vertical-tightness', 'bbvt', '=i' ); $add_option->( 'block-brace-vertical-tightness-list', 'bbvtl', '=s' ); $add_option->( 'brace-vertical-tightness', 'bvt', '=i' ); $add_option->( 'brace-vertical-tightness-closing', 'bvtc', '=i' ); $add_option->( 'cuddled-else', 'ce', '!' ); $add_option->( 'delete-old-newlines', 'dnl', '!' ); $add_option->( 'opening-brace-always-on-right', 'bar', '!' ); $add_option->( 'opening-brace-on-new-line', 'bl', '!' ); $add_option->( 'opening-hash-brace-right', 'ohbr', '!' ); $add_option->( 'opening-paren-right', 'opr', '!' ); $add_option->( 'opening-square-bracket-right', 'osbr', '!' ); $add_option->( 'opening-anonymous-sub-brace-on-new-line', 'asbl', '!' ); $add_option->( 'opening-sub-brace-on-new-line', 'sbl', '!' ); $add_option->( 'paren-vertical-tightness', 'pvt', '=i' ); $add_option->( 'paren-vertical-tightness-closing', 'pvtc', '=i' ); $add_option->( 'stack-closing-hash-brace', 'schb', '!' ); $add_option->( 'stack-closing-paren', 'scp', '!' ); $add_option->( 'stack-closing-square-bracket', 'scsb', '!' ); $add_option->( 'stack-opening-hash-brace', 'sohb', '!' ); $add_option->( 'stack-opening-paren', 'sop', '!' ); $add_option->( 'stack-opening-square-bracket', 'sosb', '!' ); $add_option->( 'vertical-tightness', 'vt', '=i' ); $add_option->( 'vertical-tightness-closing', 'vtc', '=i' ); $add_option->( 'want-break-after', 'wba', '=s' ); $add_option->( 'want-break-before', 'wbb', '=s' ); $add_option->( 'break-after-all-operators', 'baao', '!' ); $add_option->( 'break-before-all-operators', 'bbao', '!' ); $add_option->( 'keep-interior-semicolons', 'kis', '!' ); ######################################## $category = 6; # Controlling list formatting ######################################## $add_option->( 'break-at-old-comma-breakpoints', 'boc', '!' ); $add_option->( 'comma-arrow-breakpoints', 'cab', '=i' ); $add_option->( 'maximum-fields-per-table', 'mft', '=i' ); ######################################## $category = 7; # Retaining or ignoring existing line breaks ######################################## $add_option->( 'break-at-old-keyword-breakpoints', 'bok', '!' ); $add_option->( 'break-at-old-logical-breakpoints', 'bol', '!' ); $add_option->( 'break-at-old-ternary-breakpoints', 'bot', '!' ); $add_option->( 'ignore-old-breakpoints', 'iob', '!' ); ######################################## $category = 8; # Blank line control ######################################## $add_option->( 'blanks-before-blocks', 'bbb', '!' ); $add_option->( 'blanks-before-comments', 'bbc', '!' ); $add_option->( 'blanks-before-subs', 'bbs', '!' ); $add_option->( 'long-block-line-count', 'lbl', '=i' ); $add_option->( 'maximum-consecutive-blank-lines', 'mbl', '=i' ); $add_option->( 'keep-old-blank-lines', 'kbl', '=i' ); ######################################## $category = 9; # Other controls ######################################## $add_option->( 'delete-block-comments', 'dbc', '!' ); $add_option->( 'delete-closing-side-comments', 'dcsc', '!' ); $add_option->( 'delete-pod', 'dp', '!' ); $add_option->( 'delete-side-comments', 'dsc', '!' ); $add_option->( 'tee-block-comments', 'tbc', '!' ); $add_option->( 'tee-pod', 'tp', '!' ); $add_option->( 'tee-side-comments', 'tsc', '!' ); $add_option->( 'look-for-autoloader', 'lal', '!' ); $add_option->( 'look-for-hash-bang', 'x', '!' ); $add_option->( 'look-for-selfloader', 'lsl', '!' ); $add_option->( 'pass-version-line', 'pvl', '!' ); ######################################## $category = 13; # Debugging ######################################## $add_option->( 'DEBUG', 'D', '!' ); $add_option->( 'DIAGNOSTICS', 'I', '!' ); $add_option->( 'check-multiline-quotes', 'chk', '!' ); $add_option->( 'dump-defaults', 'ddf', '!' ); $add_option->( 'dump-long-names', 'dln', '!' ); $add_option->( 'dump-options', 'dop', '!' ); $add_option->( 'dump-profile', 'dpro', '!' ); $add_option->( 'dump-short-names', 'dsn', '!' ); $add_option->( 'dump-token-types', 'dtt', '!' ); $add_option->( 'dump-want-left-space', 'dwls', '!' ); $add_option->( 'dump-want-right-space', 'dwrs', '!' ); $add_option->( 'fuzzy-line-length', 'fll', '!' ); $add_option->( 'help', 'h', '' ); $add_option->( 'short-concatenation-item-length', 'scl', '=i' ); $add_option->( 'show-options', 'opt', '!' ); $add_option->( 'version', 'v', '' ); #--------------------------------------------------------------------- # The Perl::Tidy::HtmlWriter will add its own options to the string Perl::Tidy::HtmlWriter->make_getopt_long_names( \@option_string ); ######################################## # Set categories 10, 11, 12 ######################################## # Based on their known order $category = 12; # HTML properties foreach my $opt (@option_string) { my $long_name = $opt; $long_name =~ s/(!|=.*|:.*)$//; unless ( defined( $option_category{$long_name} ) ) { if ( $long_name =~ /^html-linked/ ) { $category = 10; # HTML options } elsif ( $long_name =~ /^pod2html/ ) { $category = 11; # Pod2html } $option_category{$long_name} = $category_name[$category]; } } #--------------------------------------------------------------- # Assign valid ranges to certain options #--------------------------------------------------------------- # In the future, these may be used to make preliminary checks # hash keys are long names # If key or value is undefined: # strings may have any value # integer ranges are >=0 # If value is defined: # value is [qw(any valid words)] for strings # value is [min, max] for integers # if min is undefined, there is no lower limit # if max is undefined, there is no upper limit # Parameters not listed here have defaults %option_range = ( 'format' => [ 'tidy', 'html', 'user' ], 'output-line-ending' => [ 'dos', 'win', 'mac', 'unix' ], 'block-brace-tightness' => [ 0, 2 ], 'brace-tightness' => [ 0, 2 ], 'paren-tightness' => [ 0, 2 ], 'square-bracket-tightness' => [ 0, 2 ], 'block-brace-vertical-tightness' => [ 0, 2 ], 'brace-vertical-tightness' => [ 0, 2 ], 'brace-vertical-tightness-closing' => [ 0, 2 ], 'paren-vertical-tightness' => [ 0, 2 ], 'paren-vertical-tightness-closing' => [ 0, 2 ], 'square-bracket-vertical-tightness' => [ 0, 2 ], 'square-bracket-vertical-tightness-closing' => [ 0, 2 ], 'vertical-tightness' => [ 0, 2 ], 'vertical-tightness-closing' => [ 0, 2 ], 'closing-brace-indentation' => [ 0, 3 ], 'closing-paren-indentation' => [ 0, 3 ], 'closing-square-bracket-indentation' => [ 0, 3 ], 'closing-token-indentation' => [ 0, 3 ], 'closing-side-comment-else-flag' => [ 0, 2 ], 'comma-arrow-breakpoints' => [ 0, 3 ], ); # Note: we could actually allow negative ci if someone really wants it: # $option_range{'continuation-indentation'} = [ undef, undef ]; #--------------------------------------------------------------- # Assign default values to the above options here, except # for 'outfile' and 'help'. # These settings should approximate the perlstyle(1) suggestions. #--------------------------------------------------------------- my @defaults = qw( add-newlines add-semicolons add-whitespace blanks-before-blocks blanks-before-comments blanks-before-subs block-brace-tightness=0 block-brace-vertical-tightness=0 brace-tightness=1 brace-vertical-tightness-closing=0 brace-vertical-tightness=0 break-at-old-logical-breakpoints break-at-old-ternary-breakpoints break-at-old-keyword-breakpoints comma-arrow-breakpoints=1 nocheck-syntax closing-side-comment-interval=6 closing-side-comment-maximum-text=20 closing-side-comment-else-flag=0 closing-side-comments-balanced closing-paren-indentation=0 closing-brace-indentation=0 closing-square-bracket-indentation=0 continuation-indentation=2 delete-old-newlines delete-semicolons fuzzy-line-length hanging-side-comments indent-block-comments indent-columns=4 keep-old-blank-lines=1 long-block-line-count=8 look-for-autoloader look-for-selfloader maximum-consecutive-blank-lines=1 maximum-fields-per-table=0 maximum-line-length=80 minimum-space-to-comment=4 nobrace-left-and-indent nocuddled-else nodelete-old-whitespace nohtml nologfile noquiet noshow-options nostatic-side-comments notabs nowarning-output outdent-labels outdent-long-quotes outdent-long-comments paren-tightness=1 paren-vertical-tightness-closing=0 paren-vertical-tightness=0 pass-version-line recombine valign short-concatenation-item-length=8 space-for-semicolon square-bracket-tightness=1 square-bracket-vertical-tightness-closing=0 square-bracket-vertical-tightness=0 static-block-comments trim-qw format=tidy backup-file-extension=bak format-skipping pod2html html-table-of-contents html-entities ); push @defaults, "perl-syntax-check-flags=-c -T"; #--------------------------------------------------------------- # Define abbreviations which will be expanded into the above primitives. # These may be defined recursively. #--------------------------------------------------------------- %expansion = ( %expansion, 'freeze-newlines' => [qw(noadd-newlines nodelete-old-newlines)], 'fnl' => [qw(freeze-newlines)], 'freeze-whitespace' => [qw(noadd-whitespace nodelete-old-whitespace)], 'fws' => [qw(freeze-whitespace)], 'freeze-blank-lines' => [qw(maximum-consecutive-blank-lines=0 keep-old-blank-lines=2)], 'fbl' => [qw(freeze-blank-lines)], 'indent-only' => [qw(freeze-newlines freeze-whitespace)], 'outdent-long-lines' => [qw(outdent-long-quotes outdent-long-comments)], 'nooutdent-long-lines' => [qw(nooutdent-long-quotes nooutdent-long-comments)], 'noll' => [qw(nooutdent-long-lines)], 'io' => [qw(indent-only)], 'delete-all-comments' => [qw(delete-block-comments delete-side-comments delete-pod)], 'nodelete-all-comments' => [qw(nodelete-block-comments nodelete-side-comments nodelete-pod)], 'dac' => [qw(delete-all-comments)], 'ndac' => [qw(nodelete-all-comments)], 'gnu' => [qw(gnu-style)], 'pbp' => [qw(perl-best-practices)], 'tee-all-comments' => [qw(tee-block-comments tee-side-comments tee-pod)], 'notee-all-comments' => [qw(notee-block-comments notee-side-comments notee-pod)], 'tac' => [qw(tee-all-comments)], 'ntac' => [qw(notee-all-comments)], 'html' => [qw(format=html)], 'nhtml' => [qw(format=tidy)], 'tidy' => [qw(format=tidy)], 'swallow-optional-blank-lines' => [qw(kbl=0)], 'noswallow-optional-blank-lines' => [qw(kbl=1)], 'sob' => [qw(kbl=0)], 'nsob' => [qw(kbl=1)], 'break-after-comma-arrows' => [qw(cab=0)], 'nobreak-after-comma-arrows' => [qw(cab=1)], 'baa' => [qw(cab=0)], 'nbaa' => [qw(cab=1)], 'break-at-old-trinary-breakpoints' => [qw(bot)], 'cti=0' => [qw(cpi=0 cbi=0 csbi=0)], 'cti=1' => [qw(cpi=1 cbi=1 csbi=1)], 'cti=2' => [qw(cpi=2 cbi=2 csbi=2)], 'icp' => [qw(cpi=2 cbi=2 csbi=2)], 'nicp' => [qw(cpi=0 cbi=0 csbi=0)], 'closing-token-indentation=0' => [qw(cpi=0 cbi=0 csbi=0)], 'closing-token-indentation=1' => [qw(cpi=1 cbi=1 csbi=1)], 'closing-token-indentation=2' => [qw(cpi=2 cbi=2 csbi=2)], 'indent-closing-paren' => [qw(cpi=2 cbi=2 csbi=2)], 'noindent-closing-paren' => [qw(cpi=0 cbi=0 csbi=0)], 'vt=0' => [qw(pvt=0 bvt=0 sbvt=0)], 'vt=1' => [qw(pvt=1 bvt=1 sbvt=1)], 'vt=2' => [qw(pvt=2 bvt=2 sbvt=2)], 'vertical-tightness=0' => [qw(pvt=0 bvt=0 sbvt=0)], 'vertical-tightness=1' => [qw(pvt=1 bvt=1 sbvt=1)], 'vertical-tightness=2' => [qw(pvt=2 bvt=2 sbvt=2)], 'vtc=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)], 'vtc=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)], 'vtc=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)], 'vertical-tightness-closing=0' => [qw(pvtc=0 bvtc=0 sbvtc=0)], 'vertical-tightness-closing=1' => [qw(pvtc=1 bvtc=1 sbvtc=1)], 'vertical-tightness-closing=2' => [qw(pvtc=2 bvtc=2 sbvtc=2)], 'otr' => [qw(opr ohbr osbr)], 'opening-token-right' => [qw(opr ohbr osbr)], 'notr' => [qw(nopr nohbr nosbr)], 'noopening-token-right' => [qw(nopr nohbr nosbr)], 'sot' => [qw(sop sohb sosb)], 'nsot' => [qw(nsop nsohb nsosb)], 'stack-opening-tokens' => [qw(sop sohb sosb)], 'nostack-opening-tokens' => [qw(nsop nsohb nsosb)], 'sct' => [qw(scp schb scsb)], 'stack-closing-tokens' => => [qw(scp schb scsb)], 'nsct' => [qw(nscp nschb nscsb)], 'nostack-opening-tokens' => [qw(nscp nschb nscsb)], # 'mangle' originally deleted pod and comments, but to keep it # reversible, it no longer does. But if you really want to # delete them, just use: # -mangle -dac # An interesting use for 'mangle' is to do this: # perltidy -mangle myfile.pl -st | perltidy -o myfile.pl.new # which will form as many one-line blocks as possible 'mangle' => [ qw( check-syntax keep-old-blank-lines=0 delete-old-newlines delete-old-whitespace delete-semicolons indent-columns=0 maximum-consecutive-blank-lines=0 maximum-line-length=100000 noadd-newlines noadd-semicolons noadd-whitespace noblanks-before-blocks noblanks-before-subs notabs ) ], # 'extrude' originally deleted pod and comments, but to keep it # reversible, it no longer does. But if you really want to # delete them, just use # extrude -dac # # An interesting use for 'extrude' is to do this: # perltidy -extrude myfile.pl -st | perltidy -o myfile.pl.new # which will break up all one-line blocks. 'extrude' => [ qw( check-syntax ci=0 delete-old-newlines delete-old-whitespace delete-semicolons indent-columns=0 maximum-consecutive-blank-lines=0 maximum-line-length=1 noadd-semicolons noadd-whitespace noblanks-before-blocks noblanks-before-subs nofuzzy-line-length notabs norecombine ) ], # this style tries to follow the GNU Coding Standards (which do # not really apply to perl but which are followed by some perl # programmers). 'gnu-style' => [ qw( lp bl noll pt=2 bt=2 sbt=2 cpi=1 csbi=1 cbi=1 ) ], # Style suggested in Damian Conway's Perl Best Practices 'perl-best-practices' => [ qw(l=78 i=4 ci=4 st se vt=2 cti=0 pt=1 bt=1 sbt=1 bbt=1 nsfs nolq), q(wbb=% + - * / x != == >= <= =~ !~ < > | & = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=) ], # Additional styles can be added here ); Perl::Tidy::HtmlWriter->make_abbreviated_names( \%expansion ); # Uncomment next line to dump all expansions for debugging: # dump_short_names(\%expansion); return ( \@option_string, \@defaults, \%expansion, \%option_category, \%option_range ); } # end of generate_options sub process_command_line { my ( $perltidyrc_stream, $is_Windows, $Windows_type, $rpending_complaint, $dump_options_type ) = @_; use Getopt::Long; my ( $roption_string, $rdefaults, $rexpansion, $roption_category, $roption_range ) = generate_options(); #--------------------------------------------------------------- # set the defaults by passing the above list through GetOptions #--------------------------------------------------------------- my %Opts = (); { local @ARGV; my $i; # do not load the defaults if we are just dumping perltidyrc unless ( $dump_options_type eq 'perltidyrc' ) { for $i (@$rdefaults) { push @ARGV, "--" . $i } } # Patch to save users Getopt::Long configuration # and set to Getopt::Long defaults. Use eval to avoid # breaking old versions of Perl without these routines. my $glc; eval { $glc = Getopt::Long::Configure() }; unless ($@) { eval { Getopt::Long::ConfigDefaults() }; } else { $glc = undef } if ( !GetOptions( \%Opts, @$roption_string ) ) { die "Programming Bug: error in setting default options"; } # Patch to put the previous Getopt::Long configuration back eval { Getopt::Long::Configure($glc) } if defined $glc; } my $word; my @raw_options = (); my $config_file = ""; my $saw_ignore_profile = 0; my $saw_extrude = 0; my $saw_dump_profile = 0; my $i; #--------------------------------------------------------------- # Take a first look at the command-line parameters. Do as many # immediate dumps as possible, which can avoid confusion if the # perltidyrc file has an error. #--------------------------------------------------------------- foreach $i (@ARGV) { $i =~ s/^--/-/; if ( $i =~ /^-(npro|noprofile|no-profile)$/ ) { $saw_ignore_profile = 1; } # note: this must come before -pro and -profile, below: elsif ( $i =~ /^-(dump-profile|dpro)$/ ) { $saw_dump_profile = 1; } elsif ( $i =~ /^-(pro|profile)=(.+)/ ) { if ($config_file) { warn "Only one -pro=filename allowed, using '$2' instead of '$config_file'\n"; } $config_file = $2; unless ( -e $config_file ) { warn "cannot find file given with -pro=$config_file: $!\n"; $config_file = ""; } } elsif ( $i =~ /^-(pro|profile)=?$/ ) { die "usage: -pro=filename or --profile=filename, no spaces\n"; } elsif ( $i =~ /^-extrude$/ ) { $saw_extrude = 1; } elsif ( $i =~ /^-(help|h|HELP|H)$/ ) { usage(); exit 1; } elsif ( $i =~ /^-(version|v)$/ ) { show_version(); exit 1; } elsif ( $i =~ /^-(dump-defaults|ddf)$/ ) { dump_defaults(@$rdefaults); exit 1; } elsif ( $i =~ /^-(dump-long-names|dln)$/ ) { dump_long_names(@$roption_string); exit 1; } elsif ( $i =~ /^-(dump-short-names|dsn)$/ ) { dump_short_names($rexpansion); exit 1; } elsif ( $i =~ /^-(dump-token-types|dtt)$/ ) { Perl::Tidy::Tokenizer->dump_token_types(*STDOUT); exit 1; } } if ( $saw_dump_profile && $saw_ignore_profile ) { warn "No profile to dump because of -npro\n"; exit 1; } #--------------------------------------------------------------- # read any .perltidyrc configuration file #--------------------------------------------------------------- unless ($saw_ignore_profile) { # resolve possible conflict between $perltidyrc_stream passed # as call parameter to perltidy and -pro=filename on command # line. if ($perltidyrc_stream) { if ($config_file) { warn <{'vertical-tightness'} ) { my $vt = $rOpts->{'vertical-tightness'}; $rOpts->{'paren-vertical-tightness'} = $vt; $rOpts->{'square-bracket-vertical-tightness'} = $vt; $rOpts->{'brace-vertical-tightness'} = $vt; } if ( defined $rOpts->{'vertical-tightness-closing'} ) { my $vtc = $rOpts->{'vertical-tightness-closing'}; $rOpts->{'paren-vertical-tightness-closing'} = $vtc; $rOpts->{'square-bracket-vertical-tightness-closing'} = $vtc; $rOpts->{'brace-vertical-tightness-closing'} = $vtc; } if ( defined $rOpts->{'closing-token-indentation'} ) { my $cti = $rOpts->{'closing-token-indentation'}; $rOpts->{'closing-square-bracket-indentation'} = $cti; $rOpts->{'closing-brace-indentation'} = $cti; $rOpts->{'closing-paren-indentation'} = $cti; } # In quiet mode, there is no log file and hence no way to report # results of syntax check, so don't do it. if ( $rOpts->{'quiet'} ) { $rOpts->{'check-syntax'} = 0; } # can't check syntax if no output if ( $rOpts->{'format'} ne 'tidy' ) { $rOpts->{'check-syntax'} = 0; } # Never let Windows 9x/Me systems run syntax check -- this will prevent a # wide variety of nasty problems on these systems, because they cannot # reliably run backticks. Don't even think about changing this! if ( $rOpts->{'check-syntax'} && $is_Windows && ( !$Windows_type || $Windows_type =~ /^(9|Me)/ ) ) { $rOpts->{'check-syntax'} = 0; } # It's really a bad idea to check syntax as root unless you wrote # the script yourself. FIXME: not sure if this works with VMS unless ($is_Windows) { if ( $< == 0 && $rOpts->{'check-syntax'} ) { $rOpts->{'check-syntax'} = 0; $$rpending_complaint .= "Syntax check deactivated for safety; you shouldn't run this as root\n"; } } # see if user set a non-negative logfile-gap if ( defined( $rOpts->{'logfile-gap'} ) && $rOpts->{'logfile-gap'} >= 0 ) { # a zero gap will be taken as a 1 if ( $rOpts->{'logfile-gap'} == 0 ) { $rOpts->{'logfile-gap'} = 1; } # setting a non-negative logfile gap causes logfile to be saved $rOpts->{'logfile'} = 1; } # not setting logfile gap, or setting it negative, causes default of 50 else { $rOpts->{'logfile-gap'} = 50; } # set short-cut flag when only indentation is to be done. # Note that the user may or may not have already set the # indent-only flag. if ( !$rOpts->{'add-whitespace'} && !$rOpts->{'delete-old-whitespace'} && !$rOpts->{'add-newlines'} && !$rOpts->{'delete-old-newlines'} ) { $rOpts->{'indent-only'} = 1; } # -isbc implies -ibc if ( $rOpts->{'indent-spaced-block-comments'} ) { $rOpts->{'indent-block-comments'} = 1; } # -bli flag implies -bl if ( $rOpts->{'brace-left-and-indent'} ) { $rOpts->{'opening-brace-on-new-line'} = 1; } if ( $rOpts->{'opening-brace-always-on-right'} && $rOpts->{'opening-brace-on-new-line'} ) { warn <{'opening-brace-on-new-line'} = 0; } # it simplifies things if -bl is 0 rather than undefined if ( !defined( $rOpts->{'opening-brace-on-new-line'} ) ) { $rOpts->{'opening-brace-on-new-line'} = 0; } # -sbl defaults to -bl if not defined if ( !defined( $rOpts->{'opening-sub-brace-on-new-line'} ) ) { $rOpts->{'opening-sub-brace-on-new-line'} = $rOpts->{'opening-brace-on-new-line'}; } if ( $rOpts->{'entab-leading-whitespace'} ) { if ( $rOpts->{'entab-leading-whitespace'} < 0 ) { warn "-et=n must use a positive integer; ignoring -et\n"; $rOpts->{'entab-leading-whitespace'} = undef; } # entab leading whitespace has priority over the older 'tabs' option if ( $rOpts->{'tabs'} ) { $rOpts->{'tabs'} = 0; } } } sub expand_command_abbreviations { # go through @ARGV and expand any abbreviations my ( $rexpansion, $rraw_options, $config_file ) = @_; my ($word); # set a pass limit to prevent an infinite loop; # 10 should be plenty, but it may be increased to allow deeply # nested expansions. my $max_passes = 10; my @new_argv = (); # keep looping until all expansions have been converted into actual # dash parameters.. for ( my $pass_count = 0 ; $pass_count <= $max_passes ; $pass_count++ ) { my @new_argv = (); my $abbrev_count = 0; # loop over each item in @ARGV.. foreach $word (@ARGV) { # convert any leading 'no-' to just 'no' if ( $word =~ /^(-[-]?no)-(.*)/ ) { $word = $1 . $2 } # if it is a dash flag (instead of a file name).. if ( $word =~ /^-[-]?([\w\-]+)(.*)/ ) { my $abr = $1; my $flags = $2; # save the raw input for debug output in case of circular refs if ( $pass_count == 0 ) { push( @$rraw_options, $word ); } # recombine abbreviation and flag, if necessary, # to allow abbreviations with arguments such as '-vt=1' if ( $rexpansion->{ $abr . $flags } ) { $abr = $abr . $flags; $flags = ""; } # if we see this dash item in the expansion hash.. if ( $rexpansion->{$abr} ) { $abbrev_count++; # stuff all of the words that it expands to into the # new arg list for the next pass foreach my $abbrev ( @{ $rexpansion->{$abr} } ) { next unless $abbrev; # for safety; shouldn't happen push( @new_argv, '--' . $abbrev . $flags ); } } # not in expansion hash, must be actual long name else { push( @new_argv, $word ); } } # not a dash item, so just save it for the next pass else { push( @new_argv, $word ); } } # end of this pass # update parameter list @ARGV to the new one @ARGV = @new_argv; last unless ( $abbrev_count > 0 ); # make sure we are not in an infinite loop if ( $pass_count == $max_passes ) { print STDERR "I'm tired. We seem to be in an infinite loop trying to expand aliases.\n"; print STDERR "Here are the raw options\n"; local $" = ')('; print STDERR "(@$rraw_options)\n"; my $num = @new_argv; if ( $num < 50 ) { print STDERR "After $max_passes passes here is ARGV\n"; print STDERR "(@new_argv)\n"; } else { print STDERR "After $max_passes passes ARGV has $num entries\n"; } if ($config_file) { die <<"DIE"; Please check your configuration file $config_file for circular-references. To deactivate it, use -npro. DIE } else { die <<'DIE'; Program bug - circular-references in the %expansion hash, probably due to a recent program change. DIE } } # end of check for circular references } # end of loop over all passes } # Debug routine -- this will dump the expansion hash sub dump_short_names { my $rexpansion = shift; print STDOUT < @list\n"; } } sub check_vms_filename { # given a valid filename (the perltidy input file) # create a modified filename and separator character # suitable for VMS. # # Contributed by Michael Cartmell # my ( $base, $path ) = fileparse( $_[0] ); # remove explicit ; version $base =~ s/;-?\d*$// # remove explicit . version ie two dots in filename NB ^ escapes a dot or $base =~ s/( # begin capture $1 (?:^|[^^])\. # match a dot not preceded by a caret (?: # followed by nothing | # or .*[^^] # anything ending in a non caret ) ) # end capture $1 \.-?\d*$ # match . version number /$1/x; # normalise filename, if there are no unescaped dots then append one $base .= '.' unless $base =~ /(?:^|[^^])\./; # if we don't already have an extension then we just append the extention my $separator = ( $base =~ /\.$/ ) ? "" : "_"; return ( $path . $base, $separator ); } sub Win_OS_Type { # TODO: are these more standard names? # Win32s Win95 Win98 WinMe WinNT3.51 WinNT4 Win2000 WinXP/.Net Win2003 # Returns a string that determines what MS OS we are on. # Returns win32s,95,98,Me,NT3.51,NT4,2000,XP/.Net,Win2003 # Returns blank string if not an MS system. # Original code contributed by: Yves Orton # We need to know this to decide where to look for config files my $rpending_complaint = shift; my $os = ""; return $os unless $^O =~ /win32|dos/i; # is it a MS box? # Systems built from Perl source may not have Win32.pm # But probably have Win32::GetOSVersion() anyway so the # following line is not 'required': # return $os unless eval('require Win32'); # Use the standard API call to determine the version my ( $undef, $major, $minor, $build, $id ); eval { ( $undef, $major, $minor, $build, $id ) = Win32::GetOSVersion() }; # # NAME ID MAJOR MINOR # Windows NT 4 2 4 0 # Windows 2000 2 5 0 # Windows XP 2 5 1 # Windows Server 2003 2 5 2 return "win32s" unless $id; # If id==0 then its a win32s box. $os = { # Magic numbers from MSDN # documentation of GetOSVersion 1 => { 0 => "95", 10 => "98", 90 => "Me" }, 2 => { 0 => "2000", # or NT 4, see below 1 => "XP/.Net", 2 => "Win2003", 51 => "NT3.51" } }->{$id}->{$minor}; # If $os is undefined, the above code is out of date. Suggested updates # are welcome. unless ( defined $os ) { $os = ""; $$rpending_complaint .= <($config_file); if ($is_Windows) { $config_file = "perltidy.ini"; return $config_file if $exists_config_file->($config_file); } # Default environment vars. my @envs = qw(PERLTIDY HOME); # Check the NT/2k/XP locations, first a local machine def, then a # network def push @envs, qw(USERPROFILE HOMESHARE) if $^O =~ /win32/i; # Now go through the enviornment ... foreach my $var (@envs) { $$rconfig_file_chatter .= "# Examining: \$ENV{$var}"; if ( defined( $ENV{$var} ) ) { $$rconfig_file_chatter .= " = $ENV{$var}\n"; # test ENV{ PERLTIDY } as file: if ( $var eq 'PERLTIDY' ) { $config_file = "$ENV{$var}"; return $config_file if $exists_config_file->($config_file); } # test ENV as directory: $config_file = catfile( $ENV{$var}, ".perltidyrc" ); return $config_file if $exists_config_file->($config_file); if ($is_Windows) { $config_file = catfile( $ENV{$var}, "perltidy.ini" ); return $config_file if $exists_config_file->($config_file); } } else { $$rconfig_file_chatter .= "\n"; } } # then look for a system-wide definition # where to look varies with OS if ($is_Windows) { if ($Windows_type) { my ( $os, $system, $allusers ) = Win_Config_Locs( $rpending_complaint, $Windows_type ); # Check All Users directory, if there is one. # i.e. C:\Documents and Settings\User\perltidy.ini if ($allusers) { $config_file = catfile( $allusers, ".perltidyrc" ); return $config_file if $exists_config_file->($config_file); $config_file = catfile( $allusers, "perltidy.ini" ); return $config_file if $exists_config_file->($config_file); } # Check system directory. # retain old code in case someone has been able to create # a file with a leading period. $config_file = catfile( $system, ".perltidyrc" ); return $config_file if $exists_config_file->($config_file); $config_file = catfile( $system, "perltidy.ini" ); return $config_file if $exists_config_file->($config_file); } } # Place to add customization code for other systems elsif ( $^O eq 'OS2' ) { } elsif ( $^O eq 'MacOS' ) { } elsif ( $^O eq 'VMS' ) { } # Assume some kind of Unix else { $config_file = "/usr/local/etc/perltidyrc"; return $config_file if $exists_config_file->($config_file); $config_file = "/etc/perltidyrc"; return $config_file if $exists_config_file->($config_file); } # Couldn't find a config file return; } sub Win_Config_Locs { # In scalar context returns the OS name (95 98 ME NT3.51 NT4 2000 XP), # or undef if its not a win32 OS. In list context returns OS, System # Directory, and All Users Directory. All Users will be empty on a # 9x/Me box. Contributed by: Yves Orton. my $rpending_complaint = shift; my $os = (@_) ? shift : Win_OS_Type(); return unless $os; my $system = ""; my $allusers = ""; if ( $os =~ /9[58]|Me/ ) { $system = "C:/Windows"; } elsif ( $os =~ /NT|XP|200?/ ) { $system = ( $os =~ /XP/ ) ? "C:/Windows/" : "C:/WinNT/"; $allusers = ( $os =~ /NT/ ) ? "C:/WinNT/profiles/All Users/" : "C:/Documents and Settings/All Users/"; } else { # This currently would only happen on a win32s computer. I dont have # one to test, so I am unsure how to proceed. Suggestions welcome! $$rpending_complaint .= "I dont know a sensible place to look for config files on an $os system.\n"; return; } return wantarray ? ( $os, $system, $allusers ) : $os; } sub dump_config_file { my $fh = shift; my $config_file = shift; my $rconfig_file_chatter = shift; print STDOUT "$$rconfig_file_chatter"; if ($fh) { print STDOUT "# Dump of file: '$config_file'\n"; while ( my $line = $fh->getline() ) { print STDOUT $line } eval { $fh->close() }; } else { print STDOUT "# ...no config file found\n"; } } sub read_config_file { my ( $fh, $config_file, $rexpansion ) = @_; my @config_list = (); # file is bad if non-empty $death_message is returned my $death_message = ""; my $name = undef; my $line_no; while ( my $line = $fh->getline() ) { $line_no++; chomp $line; next if $line =~ /^\s*#/; # skip full-line comment ( $line, $death_message ) = strip_comment( $line, $config_file, $line_no ); last if ($death_message); $line =~ s/^\s*(.*?)\s*$/$1/; # trim both ends next unless $line; # look for something of the general form # newname { body } # or just # body if ( $line =~ /^((\w+)\s*\{)?([^}]*)(\})?$/ ) { my ( $newname, $body, $curly ) = ( $2, $3, $4 ); # handle a new alias definition if ($newname) { if ($name) { $death_message = "No '}' seen after $name and before $newname in config file $config_file line $.\n"; last; } $name = $newname; if ( ${$rexpansion}{$name} ) { local $" = ')('; my @names = sort keys %$rexpansion; $death_message = "Here is a list of all installed aliases\n(@names)\n" . "Attempting to redefine alias ($name) in config file $config_file line $.\n"; last; } ${$rexpansion}{$name} = []; } # now do the body if ($body) { my ( $rbody_parts, $msg ) = parse_args($body); if ($msg) { $death_message = <close() }; return ( \@config_list, $death_message ); } sub strip_comment { my ( $instr, $config_file, $line_no ) = @_; my $msg = ""; # nothing to do if no comments if ( $instr !~ /#/ ) { return ( $instr, $msg ); } # use simple method of no quotes elsif ( $instr !~ /['"]/ ) { $instr =~ s/\s*\#.*$//; # simple trim return ( $instr, $msg ); } # handle comments and quotes my $outstr = ""; my $quote_char = ""; while (1) { # looking for ending quote character if ($quote_char) { if ( $instr =~ /\G($quote_char)/gc ) { $quote_char = ""; $outstr .= $1; } elsif ( $instr =~ /\G(.)/gc ) { $outstr .= $1; } # error..we reached the end without seeing the ending quote char else { $msg = < in this text: $instr Please fix this line or use -npro to avoid reading this file EOM last; } } # accumulating characters and looking for start of a quoted string else { if ( $instr =~ /\G([\"\'])/gc ) { $outstr .= $1; $quote_char = $1; } elsif ( $instr =~ /\G#/gc ) { last; } elsif ( $instr =~ /\G(.)/gc ) { $outstr .= $1; } else { last; } } } return ( $outstr, $msg ); } sub parse_args { # Parse a command string containing multiple string with possible # quotes, into individual commands. It might look like this, for example: # # -wba=" + - " -some-thing -wbb='. && ||' # # There is no need, at present, to handle escaped quote characters. # (They are not perltidy tokens, so needn't be in strings). my ($body) = @_; my @body_parts = (); my $quote_char = ""; my $part = ""; my $msg = ""; while (1) { # looking for ending quote character if ($quote_char) { if ( $body =~ /\G($quote_char)/gc ) { $quote_char = ""; } elsif ( $body =~ /\G(.)/gc ) { $part .= $1; } # error..we reached the end without seeing the ending quote char else { if ( length($part) ) { push @body_parts, $part; } $msg = < in this text: $body EOM last; } } # accumulating characters and looking for start of a quoted string else { if ( $body =~ /\G([\"\'])/gc ) { $quote_char = $1; } elsif ( $body =~ /\G(\s+)/gc ) { if ( length($part) ) { push @body_parts, $part; } $part = ""; } elsif ( $body =~ /\G(.)/gc ) { $part .= $1; } else { if ( length($part) ) { push @body_parts, $part; } last; } } } return ( \@body_parts, $msg ); } sub dump_long_names { my @names = sort @_; print STDOUT < does not take an argument # =s takes a mandatory string # :s takes an optional string # =i takes a mandatory integer # :i takes an optional integer # ! does not take an argument and may be negated # i.e., -foo and -nofoo are allowed # a double dash signals the end of the options list # #--------------------------------------------------------------- EOM foreach (@names) { print STDOUT "$_\n" } } sub dump_defaults { my @defaults = sort @_; print STDOUT "Default command line options:\n"; foreach (@_) { print STDOUT "$_\n" } } sub readable_options { # return options for this run as a string which could be # put in a perltidyrc file my ( $rOpts, $roption_string ) = @_; my %Getopt_flags; my $rGetopt_flags = \%Getopt_flags; my $readable_options = "# Final parameter set for this run.\n"; $readable_options .= "# See utility 'perltidyrc_dump.pl' for nicer formatting.\n"; foreach my $opt ( @{$roption_string} ) { my $flag = ""; if ( $opt =~ /(.*)(!|=.*)$/ ) { $opt = $1; $flag = $2; } if ( defined( $rOpts->{$opt} ) ) { $rGetopt_flags->{$opt} = $flag; } } foreach my $key ( sort keys %{$rOpts} ) { my $flag = $rGetopt_flags->{$key}; my $value = $rOpts->{$key}; my $prefix = '--'; my $suffix = ""; if ($flag) { if ( $flag =~ /^=/ ) { if ( $value !~ /^\d+$/ ) { $value = '"' . $value . '"' } $suffix = "=" . $value; } elsif ( $flag =~ /^!/ ) { $prefix .= "no" unless ($value); } else { # shouldn't happen $readable_options .= "# ERROR in dump_options: unrecognized flag $flag for $key\n"; } } $readable_options .= $prefix . $key . $suffix . "\n"; } return $readable_options; } sub show_version { print <<"EOM"; This is perltidy, v$VERSION Copyright 2000-2009, Steve Hancock Perltidy is free software and may be copied under the terms of the GNU General Public License, which is included in the distribution files. Complete documentation for perltidy can be found using 'man perltidy' or on the internet at http://perltidy.sourceforge.net. EOM } sub usage { print STDOUT <outfile perltidy [ options ] outfile Options have short and long forms. Short forms are shown; see man pages for long forms. Note: '=s' indicates a required string, and '=n' indicates a required integer. I/O control -h show this help -o=file name of the output file (only if single input file) -oext=s change output extension from 'tdy' to s -opath=path change path to be 'path' for output files -b backup original to .bak and modify file in-place -bext=s change default backup extension from 'bak' to s -q deactivate error messages (for running under editor) -w include non-critical warning messages in the .ERR error output -syn run perl -c to check syntax (default under unix systems) -log save .LOG file, which has useful diagnostics -f force perltidy to read a binary file -g like -log but writes more detailed .LOG file, for debugging scripts -opt write the set of options actually used to a .LOG file -npro ignore .perltidyrc configuration command file -pro=file read configuration commands from file instead of .perltidyrc -st send output to standard output, STDOUT -se send error output to standard error output, STDERR -v display version number to standard output and quit Basic Options: -i=n use n columns per indentation level (default n=4) -t tabs: use one tab character per indentation level, not recommeded -nt no tabs: use n spaces per indentation level (default) -et=n entab leading whitespace n spaces per tab; not recommended -io "indent only": just do indentation, no other formatting. -sil=n set starting indentation level to n; use if auto detection fails -ole=s specify output line ending (s=dos or win, mac, unix) -ple keep output line endings same as input (input must be filename) Whitespace Control -fws freeze whitespace; this disables all whitespace changes and disables the following switches: -bt=n sets brace tightness, n= (0 = loose, 1=default, 2 = tight) -bbt same as -bt but for code block braces; same as -bt if not given -bbvt block braces vertically tight; use with -bl or -bli -bbvtl=s make -bbvt to apply to selected list of block types -pt=n paren tightness (n=0, 1 or 2) -sbt=n square bracket tightness (n=0, 1, or 2) -bvt=n brace vertical tightness, n=(0=open, 1=close unless multiple steps on a line, 2=always close) -pvt=n paren vertical tightness (see -bvt for n) -sbvt=n square bracket vertical tightness (see -bvt for n) -bvtc=n closing brace vertical tightness: n=(0=open, 1=sometimes close, 2=always close) -pvtc=n closing paren vertical tightness, see -bvtc for n. -sbvtc=n closing square bracket vertical tightness, see -bvtc for n. -ci=n sets continuation indentation=n, default is n=2 spaces -lp line up parentheses, brackets, and non-BLOCK braces -sfs add space before semicolon in for( ; ; ) -aws allow perltidy to add whitespace (default) -dws delete all old non-essential whitespace -icb indent closing brace of a code block -cti=n closing indentation of paren, square bracket, or non-block brace: n=0 none, =1 align with opening, =2 one full indentation level -icp equivalent to -cti=2 -wls=s want space left of tokens in string; i.e. -nwls='+ - * /' -wrs=s want space right of tokens in string; -sts put space before terminal semicolon of a statement -sak=s put space between keywords given in s and '('; -nsak=s no space between keywords in s and '('; i.e. -nsak='my our local' Line Break Control -fnl freeze newlines; this disables all line break changes and disables the following switches: -anl add newlines; ok to introduce new line breaks -bbs add blank line before subs and packages -bbc add blank line before block comments -bbb add blank line between major blocks -kbl=n keep old blank lines? 0=no, 1=some, 2=all -mbl=n maximum consecutive blank lines to output (default=1) -ce cuddled else; use this style: '} else {' -dnl delete old newlines (default) -l=n maximum line length; default n=80 -bl opening brace on new line -sbl opening sub brace on new line. value of -bl is used if not given. -bli opening brace on new line and indented -bar opening brace always on right, even for long clauses -vt=n vertical tightness (requires -lp); n controls break after opening token: 0=never 1=no break if next line balanced 2=no break -vtc=n vertical tightness of closing container; n controls if closing token starts new line: 0=always 1=not unless list 1=never -wba=s want break after tokens in string; i.e. wba=': .' -wbb=s want break before tokens in string Following Old Breakpoints -kis keep interior semicolons. Allows multiple statements per line. -boc break at old comma breaks: turns off all automatic list formatting -bol break at old logical breakpoints: or, and, ||, && (default) -bok break at old list keyword breakpoints such as map, sort (default) -bot break at old conditional (ternary ?:) operator breakpoints (default) -cab=n break at commas after a comma-arrow (=>): n=0 break at all commas after => n=1 stable: break unless this breaks an existing one-line container n=2 break only if a one-line container cannot be formed n=3 do not treat commas after => specially at all Comment controls -ibc indent block comments (default) -isbc indent spaced block comments; may indent unless no leading space -msc=n minimum desired spaces to side comment, default 4 -fpsc=n fix position for side comments; default 0; -csc add or update closing side comments after closing BLOCK brace -dcsc delete closing side comments created by a -csc command -cscp=s change closing side comment prefix to be other than '## end' -cscl=s change closing side comment to apply to selected list of blocks -csci=n minimum number of lines needed to apply a -csc tag, default n=6 -csct=n maximum number of columns of appended text, default n=20 -cscw causes warning if old side comment is overwritten with -csc -sbc use 'static block comments' identified by leading '##' (default) -sbcp=s change static block comment identifier to be other than '##' -osbc outdent static block comments -ssc use 'static side comments' identified by leading '##' (default) -sscp=s change static side comment identifier to be other than '##' Delete selected text -dac delete all comments AND pod -dbc delete block comments -dsc delete side comments -dp delete pod Send selected text to a '.TEE' file -tac tee all comments AND pod -tbc tee block comments -tsc tee side comments -tp tee pod Outdenting -olq outdent long quoted strings (default) -olc outdent a long block comment line -ola outdent statement labels -okw outdent control keywords (redo, next, last, goto, return) -okwl=s specify alternative keywords for -okw command Other controls -mft=n maximum fields per table; default n=40 -x do not format lines before hash-bang line (i.e., for VMS) -asc allows perltidy to add a ';' when missing (default) -dsm allows perltidy to delete an unnecessary ';' (default) Combinations of other parameters -gnu attempt to follow GNU Coding Standards as applied to perl -mangle remove as many newlines as possible (but keep comments and pods) -extrude insert as many newlines as possible Dump and die, debugging -dop dump options used in this run to standard output and quit -ddf dump default options to standard output and quit -dsn dump all option short names to standard output and quit -dln dump option long names to standard output and quit -dpro dump whatever configuration file is in effect to standard output -dtt dump all token types to standard output and quit HTML -html write an html file (see 'man perl2web' for many options) Note: when -html is used, no indentation or formatting are done. Hint: try perltidy -html -css=mystyle.css filename.pl and edit mystyle.css to change the appearance of filename.html. -nnn gives line numbers -pre only writes out
..
code section -toc places a table of contents to subs at the top (default) -pod passes pod text through pod2html (default) -frm write html as a frame (3 files) -text=s extra extension for table of contents if -frm, default='toc' -sext=s extra extension for file content if -frm, default='src' A prefix of "n" negates short form toggle switches, and a prefix of "no" negates the long forms. For example, -nasc means don't add missing semicolons. If you are unable to see this entire text, try "perltidy -h | more" For more detailed information, and additional options, try "man perltidy", or go to the perltidy home page at http://perltidy.sourceforge.net EOF } sub process_this_file { my ( $truth, $beauty ) = @_; # loop to process each line of this file while ( my $line_of_tokens = $truth->get_line() ) { $beauty->write_line($line_of_tokens); } # finish up eval { $beauty->finish_formatting() }; $truth->report_tokenization_errors(); } sub check_syntax { # Use 'perl -c' to make sure that we did not create bad syntax # This is a very good independent check for programming errors # # Given names of the input and output files, ($ifname, $ofname), # we do the following: # - check syntax of the input file # - if bad, all done (could be an incomplete code snippet) # - if infile syntax ok, then check syntax of the output file; # - if outfile syntax bad, issue warning; this implies a code bug! # - set and return flag "infile_syntax_ok" : =-1 bad 0 unknown 1 good my ( $ifname, $ofname, $logger_object, $rOpts ) = @_; my $infile_syntax_ok = 0; my $line_of_dashes = '-' x 42 . "\n"; my $flags = $rOpts->{'perl-syntax-check-flags'}; # be sure we invoke perl with -c # note: perl will accept repeated flags like '-c -c'. It is safest # to append another -c than try to find an interior bundled c, as # in -Tc, because such a 'c' might be in a quoted string, for example. if ( $flags !~ /(^-c|\s+-c)/ ) { $flags .= " -c" } # be sure we invoke perl with -x if requested # same comments about repeated parameters applies if ( $rOpts->{'look-for-hash-bang'} ) { if ( $flags !~ /(^-x|\s+-x)/ ) { $flags .= " -x" } } # this shouldn't happen unless a termporary file couldn't be made if ( $ifname eq '-' ) { $logger_object->write_logfile_entry( "Cannot run perl -c on STDIN and STDOUT\n"); return $infile_syntax_ok; } $logger_object->write_logfile_entry( "checking input file syntax with perl $flags\n"); $logger_object->write_logfile_entry($line_of_dashes); # Not all operating systems/shells support redirection of the standard # error output. my $error_redirection = ( $^O eq 'VMS' ) ? "" : '2>&1'; my $perl_output = do_syntax_check( $ifname, $flags, $error_redirection ); $logger_object->write_logfile_entry("$perl_output\n"); if ( $perl_output =~ /syntax\s*OK/ ) { $infile_syntax_ok = 1; $logger_object->write_logfile_entry($line_of_dashes); $logger_object->write_logfile_entry( "checking output file syntax with perl $flags ...\n"); $logger_object->write_logfile_entry($line_of_dashes); my $perl_output = do_syntax_check( $ofname, $flags, $error_redirection ); $logger_object->write_logfile_entry("$perl_output\n"); unless ( $perl_output =~ /syntax\s*OK/ ) { $logger_object->write_logfile_entry($line_of_dashes); $logger_object->warning( "The output file has a syntax error when tested with perl $flags $ofname !\n" ); $logger_object->warning( "This implies an error in perltidy; the file $ofname is bad\n"); $logger_object->report_definite_bug(); # the perl version number will be helpful for diagnosing the problem $logger_object->write_logfile_entry( qx/perl -v $error_redirection/ . "\n" ); } } else { # Only warn of perl -c syntax errors. Other messages, # such as missing modules, are too common. They can be # seen by running with perltidy -w $logger_object->complain("A syntax check using perl $flags gives: \n"); $logger_object->complain($line_of_dashes); $logger_object->complain("$perl_output\n"); $logger_object->complain($line_of_dashes); $infile_syntax_ok = -1; $logger_object->write_logfile_entry($line_of_dashes); $logger_object->write_logfile_entry( "The output file will not be checked because of input file problems\n" ); } return $infile_syntax_ok; } sub do_syntax_check { my ( $fname, $flags, $error_redirection ) = @_; # We have to quote the filename in case it has unusual characters # or spaces. Example: this filename #CM11.pm# gives trouble. $fname = '"' . $fname . '"'; # Under VMS something like -T will become -t (and an error) so we # will put quotes around the flags. Double quotes seem to work on # Unix/Windows/VMS, but this may not work on all systems. (Single # quotes do not work under Windows). It could become necessary to # put double quotes around each flag, such as: -"c" -"T" # We may eventually need some system-dependent coding here. $flags = '"' . $flags . '"'; # now wish for luck... return qx/perl $flags $fname $error_redirection/; } ##################################################################### # # This is a stripped down version of IO::Scalar # Given a reference to a scalar, it supplies either: # a getline method which reads lines (mode='r'), or # a print method which reads lines (mode='w') # ##################################################################### package Perl::Tidy::IOScalar; use Carp; sub new { my ( $package, $rscalar, $mode ) = @_; my $ref = ref $rscalar; if ( $ref ne 'SCALAR' ) { confess <[1]; if ( $mode ne 'r' ) { confess <[2]++; ##my $line = $self->[0]->[$i]; return $self->[0]->[$i]; } sub print { my $self = shift; my $mode = $self->[1]; if ( $mode ne 'w' ) { confess <[0] } .= $_[0]; } sub close { return } ##################################################################### # # This is a stripped down version of IO::ScalarArray # Given a reference to an array, it supplies either: # a getline method which reads lines (mode='r'), or # a print method which reads lines (mode='w') # # NOTE: this routine assumes that that there aren't any embedded # newlines within any of the array elements. There are no checks # for that. # ##################################################################### package Perl::Tidy::IOScalarArray; use Carp; sub new { my ( $package, $rarray, $mode ) = @_; my $ref = ref $rarray; if ( $ref ne 'ARRAY' ) { confess <[1]; if ( $mode ne 'r' ) { confess <[2]++; return $self->[0]->[$i]; } sub print { my $self = shift; my $mode = $self->[1]; if ( $mode ne 'w' ) { confess <[0] }, $_[0]; } sub close { return } ##################################################################### # # the Perl::Tidy::LineSource class supplies an object with a 'get_line()' method # which returns the next line to be parsed # ##################################################################### package Perl::Tidy::LineSource; sub new { my ( $class, $input_file, $rOpts, $rpending_logfile_message ) = @_; my $input_file_copy = undef; my $fh_copy; my $input_line_ending; if ( $rOpts->{'preserve-line-endings'} ) { $input_line_ending = Perl::Tidy::find_input_line_ending($input_file); } ( my $fh, $input_file ) = Perl::Tidy::streamhandle( $input_file, 'r' ); return undef unless $fh; # in order to check output syntax when standard output is used, # or when it is an object, we have to make a copy of the file if ( ( $input_file eq '-' || ref $input_file ) && $rOpts->{'check-syntax'} ) { # Turning off syntax check when input output is used. # The reason is that temporary files cause problems on # on many systems. $rOpts->{'check-syntax'} = 0; $input_file_copy = '-'; $$rpending_logfile_message .= < $fh, _fh_copy => $fh_copy, _filename => $input_file, _input_file_copy => $input_file_copy, _input_line_ending => $input_line_ending, _rinput_buffer => [], _started => 0, }, $class; } sub get_input_file_copy_name { my $self = shift; my $ifname = $self->{_input_file_copy}; unless ($ifname) { $ifname = $self->{_filename}; } return $ifname; } sub close_input_file { my $self = shift; eval { $self->{_fh}->close() }; eval { $self->{_fh_copy}->close() } if $self->{_fh_copy}; } sub get_line { my $self = shift; my $line = undef; my $fh = $self->{_fh}; my $fh_copy = $self->{_fh_copy}; my $rinput_buffer = $self->{_rinput_buffer}; if ( scalar(@$rinput_buffer) ) { $line = shift @$rinput_buffer; } else { $line = $fh->getline(); # patch to read raw mac files under unix, dos # see if the first line has embedded \r's if ( $line && !$self->{_started} ) { if ( $line =~ /[\015][^\015\012]/ ) { # found one -- break the line up and store in a buffer @$rinput_buffer = map { $_ . "\n" } split /\015/, $line; my $count = @$rinput_buffer; $line = shift @$rinput_buffer; } $self->{_started}++; } } if ( $line && $fh_copy ) { $fh_copy->print($line); } return $line; } ##################################################################### # # the Perl::Tidy::LineSink class supplies a write_line method for # actual file writing # ##################################################################### package Perl::Tidy::LineSink; sub new { my ( $class, $output_file, $tee_file, $line_separator, $rOpts, $rpending_logfile_message, $binmode ) = @_; my $fh = undef; my $fh_copy = undef; my $fh_tee = undef; my $output_file_copy = ""; my $output_file_open = 0; if ( $rOpts->{'format'} eq 'tidy' ) { ( $fh, $output_file ) = Perl::Tidy::streamhandle( $output_file, 'w' ); unless ($fh) { die "Cannot write to output stream\n"; } $output_file_open = 1; if ($binmode) { if ( ref($fh) eq 'IO::File' ) { binmode $fh; } if ( $output_file eq '-' ) { binmode STDOUT } } } # in order to check output syntax when standard output is used, # or when it is an object, we have to make a copy of the file if ( $output_file eq '-' || ref $output_file ) { if ( $rOpts->{'check-syntax'} ) { # Turning off syntax check when standard output is used. # The reason is that temporary files cause problems on # on many systems. $rOpts->{'check-syntax'} = 0; $output_file_copy = '-'; $$rpending_logfile_message .= < $fh, _fh_copy => $fh_copy, _fh_tee => $fh_tee, _output_file => $output_file, _output_file_open => $output_file_open, _output_file_copy => $output_file_copy, _tee_flag => 0, _tee_file => $tee_file, _tee_file_opened => 0, _line_separator => $line_separator, _binmode => $binmode, }, $class; } sub write_line { my $self = shift; my $fh = $self->{_fh}; my $fh_copy = $self->{_fh_copy}; my $output_file_open = $self->{_output_file_open}; chomp $_[0]; $_[0] .= $self->{_line_separator}; $fh->print( $_[0] ) if ( $self->{_output_file_open} ); print $fh_copy $_[0] if ( $fh_copy && $self->{_output_file_copy} ); if ( $self->{_tee_flag} ) { unless ( $self->{_tee_file_opened} ) { $self->really_open_tee_file() } my $fh_tee = $self->{_fh_tee}; print $fh_tee $_[0]; } } sub get_output_file_copy { my $self = shift; my $ofname = $self->{_output_file_copy}; unless ($ofname) { $ofname = $self->{_output_file}; } return $ofname; } sub tee_on { my $self = shift; $self->{_tee_flag} = 1; } sub tee_off { my $self = shift; $self->{_tee_flag} = 0; } sub really_open_tee_file { my $self = shift; my $tee_file = $self->{_tee_file}; my $fh_tee; $fh_tee = IO::File->new(">$tee_file") or die("couldn't open TEE file $tee_file: $!\n"); binmode $fh_tee if $self->{_binmode}; $self->{_tee_file_opened} = 1; $self->{_fh_tee} = $fh_tee; } sub close_output_file { my $self = shift; eval { $self->{_fh}->close() } if $self->{_output_file_open}; eval { $self->{_fh_copy}->close() } if ( $self->{_output_file_copy} ); $self->close_tee_file(); } sub close_tee_file { my $self = shift; if ( $self->{_tee_file_opened} ) { eval { $self->{_fh_tee}->close() }; $self->{_tee_file_opened} = 0; } } ##################################################################### # # The Perl::Tidy::Diagnostics class writes the DIAGNOSTICS file, which is # useful for program development. # # Only one such file is created regardless of the number of input # files processed. This allows the results of processing many files # to be summarized in a single file. # ##################################################################### package Perl::Tidy::Diagnostics; sub new { my $class = shift; bless { _write_diagnostics_count => 0, _last_diagnostic_file => "", _input_file => "", _fh => undef, }, $class; } sub set_input_file { my $self = shift; $self->{_input_file} = $_[0]; } # This is a diagnostic routine which is useful for program development. # Output from debug messages go to a file named DIAGNOSTICS, where # they are labeled by file and line. This allows many files to be # scanned at once for some particular condition of interest. sub write_diagnostics { my $self = shift; unless ( $self->{_write_diagnostics_count} ) { open DIAGNOSTICS, ">DIAGNOSTICS" or death("couldn't open DIAGNOSTICS: $!\n"); } my $last_diagnostic_file = $self->{_last_diagnostic_file}; my $input_file = $self->{_input_file}; if ( $last_diagnostic_file ne $input_file ) { print DIAGNOSTICS "\nFILE:$input_file\n"; } $self->{_last_diagnostic_file} = $input_file; my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number(); print DIAGNOSTICS "$input_line_number:\t@_"; $self->{_write_diagnostics_count}++; } ##################################################################### # # The Perl::Tidy::Logger class writes the .LOG and .ERR files # ##################################################################### package Perl::Tidy::Logger; sub new { my $class = shift; my $fh; my ( $rOpts, $log_file, $warning_file, $saw_extrude ) = @_; # remove any old error output file unless ( ref($warning_file) ) { if ( -e $warning_file ) { unlink($warning_file) } } bless { _log_file => $log_file, _fh_warnings => undef, _rOpts => $rOpts, _fh_warnings => undef, _last_input_line_written => 0, _at_end_of_file => 0, _use_prefix => 1, _block_log_output => 0, _line_of_tokens => undef, _output_line_number => undef, _wrote_line_information_string => 0, _wrote_column_headings => 0, _warning_file => $warning_file, _warning_count => 0, _complaint_count => 0, _saw_code_bug => -1, # -1=no 0=maybe 1=for sure _saw_brace_error => 0, _saw_extrude => $saw_extrude, _output_array => [], }, $class; } sub close_log_file { my $self = shift; if ( $self->{_fh_warnings} ) { eval { $self->{_fh_warnings}->close() }; $self->{_fh_warnings} = undef; } } sub get_warning_count { my $self = shift; return $self->{_warning_count}; } sub get_use_prefix { my $self = shift; return $self->{_use_prefix}; } sub block_log_output { my $self = shift; $self->{_block_log_output} = 1; } sub unblock_log_output { my $self = shift; $self->{_block_log_output} = 0; } sub interrupt_logfile { my $self = shift; $self->{_use_prefix} = 0; $self->warning("\n"); $self->write_logfile_entry( '#' x 24 . " WARNING " . '#' x 25 . "\n" ); } sub resume_logfile { my $self = shift; $self->write_logfile_entry( '#' x 60 . "\n" ); $self->{_use_prefix} = 1; } sub we_are_at_the_last_line { my $self = shift; unless ( $self->{_wrote_line_information_string} ) { $self->write_logfile_entry("Last line\n\n"); } $self->{_at_end_of_file} = 1; } # record some stuff in case we go down in flames sub black_box { my $self = shift; my ( $line_of_tokens, $output_line_number ) = @_; my $input_line = $line_of_tokens->{_line_text}; my $input_line_number = $line_of_tokens->{_line_number}; # save line information in case we have to write a logfile message $self->{_line_of_tokens} = $line_of_tokens; $self->{_output_line_number} = $output_line_number; $self->{_wrote_line_information_string} = 0; my $last_input_line_written = $self->{_last_input_line_written}; my $rOpts = $self->{_rOpts}; if ( ( ( $input_line_number - $last_input_line_written ) >= $rOpts->{'logfile-gap'} ) || ( $input_line =~ /^\s*(sub|package)\s+(\w+)/ ) ) { my $rlevels = $line_of_tokens->{_rlevels}; my $structural_indentation_level = $$rlevels[0]; $self->{_last_input_line_written} = $input_line_number; ( my $out_str = $input_line ) =~ s/^\s*//; chomp $out_str; $out_str = ( '.' x $structural_indentation_level ) . $out_str; if ( length($out_str) > 35 ) { $out_str = substr( $out_str, 0, 35 ) . " ...."; } $self->logfile_output( "", "$out_str\n" ); } } sub write_logfile_entry { my $self = shift; # add leading >>> to avoid confusing error mesages and code $self->logfile_output( ">>>", "@_" ); } sub write_column_headings { my $self = shift; $self->{_wrote_column_headings} = 1; my $routput_array = $self->{_output_array}; push @{$routput_array}, <>>) lines levels i k (code begins with one '.' per indent level) ------ ----- - - -------- ------------------------------------------- EOM } sub make_line_information_string { # make columns of information when a logfile message needs to go out my $self = shift; my $line_of_tokens = $self->{_line_of_tokens}; my $input_line_number = $line_of_tokens->{_line_number}; my $line_information_string = ""; if ($input_line_number) { my $output_line_number = $self->{_output_line_number}; my $brace_depth = $line_of_tokens->{_curly_brace_depth}; my $paren_depth = $line_of_tokens->{_paren_depth}; my $square_bracket_depth = $line_of_tokens->{_square_bracket_depth}; my $python_indentation_level = $line_of_tokens->{_python_indentation_level}; my $rlevels = $line_of_tokens->{_rlevels}; my $rnesting_tokens = $line_of_tokens->{_rnesting_tokens}; my $rci_levels = $line_of_tokens->{_rci_levels}; my $rnesting_blocks = $line_of_tokens->{_rnesting_blocks}; my $structural_indentation_level = $$rlevels[0]; $self->write_column_headings() unless $self->{_wrote_column_headings}; # keep logfile columns aligned for scripts up to 999 lines; # for longer scripts it doesn't really matter my $extra_space = ""; $extra_space .= ( $input_line_number < 10 ) ? " " : ( $input_line_number < 100 ) ? " " : ""; $extra_space .= ( $output_line_number < 10 ) ? " " : ( $output_line_number < 100 ) ? " " : ""; # there are 2 possible nesting strings: # the original which looks like this: (0 [1 {2 # the new one, which looks like this: {{[ # the new one is easier to read, and shows the order, but # could be arbitrarily long, so we use it unless it is too long my $nesting_string = "($paren_depth [$square_bracket_depth {$brace_depth"; my $nesting_string_new = $$rnesting_tokens[0]; my $ci_level = $$rci_levels[0]; if ( $ci_level > 9 ) { $ci_level = '*' } my $bk = ( $$rnesting_blocks[0] =~ /1$/ ) ? '1' : '0'; if ( length($nesting_string_new) <= 8 ) { $nesting_string = $nesting_string_new . " " x ( 8 - length($nesting_string_new) ); } if ( $python_indentation_level < 0 ) { $python_indentation_level = 0 } $line_information_string = "L$input_line_number:$output_line_number$extra_space i$python_indentation_level:$structural_indentation_level $ci_level $bk $nesting_string"; } return $line_information_string; } sub logfile_output { my $self = shift; my ( $prompt, $msg ) = @_; return if ( $self->{_block_log_output} ); my $routput_array = $self->{_output_array}; if ( $self->{_at_end_of_file} || !$self->{_use_prefix} ) { push @{$routput_array}, "$msg"; } else { my $line_information_string = $self->make_line_information_string(); $self->{_wrote_line_information_string} = 1; if ($line_information_string) { push @{$routput_array}, "$line_information_string $prompt$msg"; } else { push @{$routput_array}, "$msg"; } } } sub get_saw_brace_error { my $self = shift; return $self->{_saw_brace_error}; } sub increment_brace_error { my $self = shift; $self->{_saw_brace_error}++; } sub brace_warning { my $self = shift; use constant BRACE_WARNING_LIMIT => 10; my $saw_brace_error = $self->{_saw_brace_error}; if ( $saw_brace_error < BRACE_WARNING_LIMIT ) { $self->warning(@_); } $saw_brace_error++; $self->{_saw_brace_error} = $saw_brace_error; if ( $saw_brace_error == BRACE_WARNING_LIMIT ) { $self->warning("No further warnings of this type will be given\n"); } } sub complain { # handle non-critical warning messages based on input flag my $self = shift; my $rOpts = $self->{_rOpts}; # these appear in .ERR output only if -w flag is used if ( $rOpts->{'warning-output'} ) { $self->warning(@_); } # otherwise, they go to the .LOG file else { $self->{_complaint_count}++; $self->write_logfile_entry(@_); } } sub warning { # report errors to .ERR file (or stdout) my $self = shift; use constant WARNING_LIMIT => 50; my $rOpts = $self->{_rOpts}; unless ( $rOpts->{'quiet'} ) { my $warning_count = $self->{_warning_count}; unless ($warning_count) { my $warning_file = $self->{_warning_file}; my $fh_warnings; if ( $rOpts->{'standard-error-output'} ) { $fh_warnings = *STDERR; } else { ( $fh_warnings, my $filename ) = Perl::Tidy::streamhandle( $warning_file, 'w' ); $fh_warnings or die("couldn't open $filename $!\n"); warn "## Please see file $filename\n"; } $self->{_fh_warnings} = $fh_warnings; } my $fh_warnings = $self->{_fh_warnings}; if ( $warning_count < WARNING_LIMIT ) { if ( $self->get_use_prefix() > 0 ) { my $input_line_number = Perl::Tidy::Tokenizer::get_input_line_number(); $fh_warnings->print("$input_line_number:\t@_"); $self->write_logfile_entry("WARNING: @_"); } else { $fh_warnings->print(@_); $self->write_logfile_entry(@_); } } $warning_count++; $self->{_warning_count} = $warning_count; if ( $warning_count == WARNING_LIMIT ) { $fh_warnings->print("No further warnings will be given\n"); } } } # programming bug codes: # -1 = no bug # 0 = maybe, not sure. # 1 = definitely sub report_possible_bug { my $self = shift; my $saw_code_bug = $self->{_saw_code_bug}; $self->{_saw_code_bug} = ( $saw_code_bug < 0 ) ? 0 : $saw_code_bug; } sub report_definite_bug { my $self = shift; $self->{_saw_code_bug} = 1; } sub ask_user_for_bug_report { my $self = shift; my ( $infile_syntax_ok, $formatter ) = @_; my $saw_code_bug = $self->{_saw_code_bug}; if ( ( $saw_code_bug == 0 ) && ( $infile_syntax_ok == 1 ) ) { $self->warning(<{_saw_extrude} ) { $self->warning(<warning(<get_added_semicolon_count(); }; if ( $added_semicolon_count > 0 ) { $self->warning(<{_rOpts}; my $warning_count = $self->{_warning_count}; my $saw_code_bug = $self->{_saw_code_bug}; my $save_logfile = ( $saw_code_bug == 0 && $infile_syntax_ok == 1 ) || $saw_code_bug == 1 || $rOpts->{'logfile'}; my $log_file = $self->{_log_file}; if ($warning_count) { if ($save_logfile) { $self->block_log_output(); # avoid echoing this to the logfile $self->warning( "The logfile $log_file may contain useful information\n"); $self->unblock_log_output(); } if ( $self->{_complaint_count} > 0 ) { $self->warning( "To see $self->{_complaint_count} non-critical warnings rerun with -w\n" ); } if ( $self->{_saw_brace_error} && ( $rOpts->{'logfile-gap'} > 1 || !$save_logfile ) ) { $self->warning("To save a full .LOG file rerun with -g\n"); } } $self->ask_user_for_bug_report( $infile_syntax_ok, $formatter ); if ($save_logfile) { my $log_file = $self->{_log_file}; my ( $fh, $filename ) = Perl::Tidy::streamhandle( $log_file, 'w' ); if ($fh) { my $routput_array = $self->{_output_array}; foreach ( @{$routput_array} ) { $fh->print($_) } eval { $fh->close() }; } } } ##################################################################### # # The Perl::Tidy::DevNull class supplies a dummy print method # ##################################################################### package Perl::Tidy::DevNull; sub new { return bless {}, $_[0] } sub print { return } sub close { return } ##################################################################### # # The Perl::Tidy::HtmlWriter class writes a copy of the input stream in html # ##################################################################### package Perl::Tidy::HtmlWriter; use File::Basename; # class variables use vars qw{ %html_color %html_bold %html_italic %token_short_names %short_to_long_names $rOpts $css_filename $css_linkname $missing_html_entities }; # replace unsafe characters with HTML entity representation if HTML::Entities # is available { eval "use HTML::Entities"; $missing_html_entities = $@; } sub new { my ( $class, $input_file, $html_file, $extension, $html_toc_extension, $html_src_extension ) = @_; my $html_file_opened = 0; my $html_fh; ( $html_fh, my $html_filename ) = Perl::Tidy::streamhandle( $html_file, 'w' ); unless ($html_fh) { warn("can't open $html_file: $!\n"); return undef; } $html_file_opened = 1; if ( !$input_file || $input_file eq '-' || ref($input_file) ) { $input_file = "NONAME"; } # write the table of contents to a string my $toc_string; my $html_toc_fh = Perl::Tidy::IOScalar->new( \$toc_string, 'w' ); my $html_pre_fh; my @pre_string_stack; if ( $rOpts->{'html-pre-only'} ) { # pre section goes directly to the output stream $html_pre_fh = $html_fh; $html_pre_fh->print( <<"PRE_END");
PRE_END
    }
    else {

        # pre section go out to a temporary string
        my $pre_string;
        $html_pre_fh = Perl::Tidy::IOScalar->new( \$pre_string, 'w' );
        push @pre_string_stack, \$pre_string;
    }

    # pod text gets diverted if the 'pod2html' is used
    my $html_pod_fh;
    my $pod_string;
    if ( $rOpts->{'pod2html'} ) {
        if ( $rOpts->{'html-pre-only'} ) {
            undef $rOpts->{'pod2html'};
        }
        else {
            eval "use Pod::Html";
            if ($@) {
                warn
"unable to find Pod::Html; cannot use pod2html\n-npod disables this message\n";
                undef $rOpts->{'pod2html'};
            }
            else {
                $html_pod_fh = Perl::Tidy::IOScalar->new( \$pod_string, 'w' );
            }
        }
    }

    my $toc_filename;
    my $src_filename;
    if ( $rOpts->{'frames'} ) {
        unless ($extension) {
            warn
"cannot use frames without a specified output extension; ignoring -frm\n";
            undef $rOpts->{'frames'};
        }
        else {
            $toc_filename = $input_file . $html_toc_extension . $extension;
            $src_filename = $input_file . $html_src_extension . $extension;
        }
    }

    # ----------------------------------------------------------
    # Output is now directed as follows:
    # html_toc_fh <-- table of contents items
    # html_pre_fh <-- the 
 section of formatted code, except:
    # html_pod_fh <-- pod goes here with the pod2html option
    # ----------------------------------------------------------

    my $title = $rOpts->{'title'};
    unless ($title) {
        ( $title, my $path ) = fileparse($input_file);
    }
    my $toc_item_count = 0;
    my $in_toc_package = "";
    my $last_level     = 0;
    bless {
        _input_file        => $input_file,          # name of input file
        _title             => $title,               # title, unescaped
        _html_file         => $html_file,           # name of .html output file
        _toc_filename      => $toc_filename,        # for frames option
        _src_filename      => $src_filename,        # for frames option
        _html_file_opened  => $html_file_opened,    # a flag
        _html_fh           => $html_fh,             # the output stream
        _html_pre_fh       => $html_pre_fh,         # pre section goes here
        _rpre_string_stack => \@pre_string_stack,   # stack of pre sections
        _html_pod_fh       => $html_pod_fh,         # pod goes here if pod2html
        _rpod_string       => \$pod_string,         # string holding pod
        _pod_cut_count     => 0,                    # how many =cut's?
        _html_toc_fh       => $html_toc_fh,         # fh for table of contents
        _rtoc_string       => \$toc_string,         # string holding toc
        _rtoc_item_count   => \$toc_item_count,     # how many toc items
        _rin_toc_package   => \$in_toc_package,     # package name
        _rtoc_name_count   => {},                   # hash to track unique names
        _rpackage_stack    => [],                   # stack to check for package
                                                    # name changes
        _rlast_level       => \$last_level,         # brace indentation level
    }, $class;
}

sub add_toc_item {

    # Add an item to the html table of contents.
    # This is called even if no table of contents is written,
    # because we still want to put the anchors in the 
 text.
    # We are given an anchor name and its type; types are:
    #      'package', 'sub', '__END__', '__DATA__', 'EOF'
    # There must be an 'EOF' call at the end to wrap things up.
    my $self = shift;
    my ( $name, $type ) = @_;
    my $html_toc_fh     = $self->{_html_toc_fh};
    my $html_pre_fh     = $self->{_html_pre_fh};
    my $rtoc_name_count = $self->{_rtoc_name_count};
    my $rtoc_item_count = $self->{_rtoc_item_count};
    my $rlast_level     = $self->{_rlast_level};
    my $rin_toc_package = $self->{_rin_toc_package};
    my $rpackage_stack  = $self->{_rpackage_stack};

    # packages contain sublists of subs, so to avoid errors all package
    # items are written and finished with the following routines
    my $end_package_list = sub {
        if ($$rin_toc_package) {
            $html_toc_fh->print("\n\n");
            $$rin_toc_package = "";
        }
    };

    my $start_package_list = sub {
        my ( $unique_name, $package ) = @_;
        if ($$rin_toc_package) { $end_package_list->() }
        $html_toc_fh->print(<package $package
    EOM $$rin_toc_package = $package; }; # start the table of contents on the first item unless ($$rtoc_item_count) { # but just quit if we hit EOF without any other entries # in this case, there will be no toc return if ( $type eq 'EOF' ); $html_toc_fh->print( <<"TOC_END");
      TOC_END } $$rtoc_item_count++; # make a unique anchor name for this location: # - packages get a 'package-' prefix # - subs use their names my $unique_name = $name; if ( $type eq 'package' ) { $unique_name = "package-$name" } # append '-1', '-2', etc if necessary to make unique; this will # be unique because subs and packages cannot have a '-' if ( my $count = $rtoc_name_count->{ lc $unique_name }++ ) { $unique_name .= "-$count"; } # - all names get terminal '-' if pod2html is used, to avoid # conflicts with anchor names created by pod2html if ( $rOpts->{'pod2html'} ) { $unique_name .= '-' } # start/stop lists of subs if ( $type eq 'sub' ) { my $package = $rpackage_stack->[$$rlast_level]; unless ($package) { $package = 'main' } # if we're already in a package/sub list, be sure its the right # package or else close it if ( $$rin_toc_package && $$rin_toc_package ne $package ) { $end_package_list->(); } # start a package/sub list if necessary unless ($$rin_toc_package) { $start_package_list->( $unique_name, $package ); } } # now write an entry in the toc for this item if ( $type eq 'package' ) { $start_package_list->( $unique_name, $name ); } elsif ( $type eq 'sub' ) { $html_toc_fh->print("
    • $name
    • \n"); } else { $end_package_list->(); $html_toc_fh->print("
    • $name
    • \n"); } # write the anchor in the
       section
          $html_pre_fh->print("");
      
          # end the table of contents, if any, on the end of file
          if ( $type eq 'EOF' ) {
              $html_toc_fh->print( <<"TOC_END");
      
    TOC_END } } BEGIN { # This is the official list of tokens which may be identified by the # user. Long names are used as getopt keys. Short names are # convenient short abbreviations for specifying input. Short names # somewhat resemble token type characters, but are often different # because they may only be alphanumeric, to allow command line # input. Also, note that because of case insensitivity of html, # this table must be in a single case only (I've chosen to use all # lower case). # When adding NEW_TOKENS: update this hash table # short names => long names %short_to_long_names = ( 'n' => 'numeric', 'p' => 'paren', 'q' => 'quote', 's' => 'structure', 'c' => 'comment', 'v' => 'v-string', 'cm' => 'comma', 'w' => 'bareword', 'co' => 'colon', 'pu' => 'punctuation', 'i' => 'identifier', 'j' => 'label', 'h' => 'here-doc-target', 'hh' => 'here-doc-text', 'k' => 'keyword', 'sc' => 'semicolon', 'm' => 'subroutine', 'pd' => 'pod-text', ); # Now we have to map actual token types into one of the above short # names; any token types not mapped will get 'punctuation' # properties. # The values of this hash table correspond to the keys of the # previous hash table. # The keys of this hash table are token types and can be seen # by running with --dump-token-types (-dtt). # When adding NEW_TOKENS: update this hash table # $type => $short_name %token_short_names = ( '#' => 'c', 'n' => 'n', 'v' => 'v', 'k' => 'k', 'F' => 'k', 'Q' => 'q', 'q' => 'q', 'J' => 'j', 'j' => 'j', 'h' => 'h', 'H' => 'hh', 'w' => 'w', ',' => 'cm', '=>' => 'cm', ';' => 'sc', ':' => 'co', 'f' => 'sc', '(' => 'p', ')' => 'p', 'M' => 'm', 'P' => 'pd', 'A' => 'co', ); # These token types will all be called identifiers for now # FIXME: need to separate user defined modules as separate type my @identifier = qw" i t U C Y Z G :: "; @token_short_names{@identifier} = ('i') x scalar(@identifier); # These token types will be called 'structure' my @structure = qw" { } "; @token_short_names{@structure} = ('s') x scalar(@structure); # OLD NOTES: save for reference # Any of these could be added later if it would be useful. # For now, they will by default become punctuation # my @list = qw" L R [ ] "; # @token_long_names{@list} = ('non-structure') x scalar(@list); # # my @list = qw" # / /= * *= ** **= + += - -= % %= = ++ -- << <<= >> >>= pp p m mm # "; # @token_long_names{@list} = ('math') x scalar(@list); # # my @list = qw" & &= ~ ~= ^ ^= | |= "; # @token_long_names{@list} = ('bit') x scalar(@list); # # my @list = qw" == != < > <= <=> "; # @token_long_names{@list} = ('numerical-comparison') x scalar(@list); # # my @list = qw" && || ! &&= ||= //= "; # @token_long_names{@list} = ('logical') x scalar(@list); # # my @list = qw" . .= =~ !~ x x= "; # @token_long_names{@list} = ('string-operators') x scalar(@list); # # # Incomplete.. # my @list = qw" .. -> <> ... \ ? "; # @token_long_names{@list} = ('misc-operators') x scalar(@list); } sub make_getopt_long_names { my $class = shift; my ($rgetopt_names) = @_; while ( my ( $short_name, $name ) = each %short_to_long_names ) { push @$rgetopt_names, "html-color-$name=s"; push @$rgetopt_names, "html-italic-$name!"; push @$rgetopt_names, "html-bold-$name!"; } push @$rgetopt_names, "html-color-background=s"; push @$rgetopt_names, "html-linked-style-sheet=s"; push @$rgetopt_names, "nohtml-style-sheets"; push @$rgetopt_names, "html-pre-only"; push @$rgetopt_names, "html-line-numbers"; push @$rgetopt_names, "html-entities!"; push @$rgetopt_names, "stylesheet"; push @$rgetopt_names, "html-table-of-contents!"; push @$rgetopt_names, "pod2html!"; push @$rgetopt_names, "frames!"; push @$rgetopt_names, "html-toc-extension=s"; push @$rgetopt_names, "html-src-extension=s"; # Pod::Html parameters: push @$rgetopt_names, "backlink=s"; push @$rgetopt_names, "cachedir=s"; push @$rgetopt_names, "htmlroot=s"; push @$rgetopt_names, "libpods=s"; push @$rgetopt_names, "podpath=s"; push @$rgetopt_names, "podroot=s"; push @$rgetopt_names, "title=s"; # Pod::Html parameters with leading 'pod' which will be removed # before the call to Pod::Html push @$rgetopt_names, "podquiet!"; push @$rgetopt_names, "podverbose!"; push @$rgetopt_names, "podrecurse!"; push @$rgetopt_names, "podflush"; push @$rgetopt_names, "podheader!"; push @$rgetopt_names, "podindex!"; } sub make_abbreviated_names { # We're appending things like this to the expansion list: # 'hcc' => [qw(html-color-comment)], # 'hck' => [qw(html-color-keyword)], # etc my $class = shift; my ($rexpansion) = @_; # abbreviations for color/bold/italic properties while ( my ( $short_name, $long_name ) = each %short_to_long_names ) { ${$rexpansion}{"hc$short_name"} = ["html-color-$long_name"]; ${$rexpansion}{"hb$short_name"} = ["html-bold-$long_name"]; ${$rexpansion}{"hi$short_name"} = ["html-italic-$long_name"]; ${$rexpansion}{"nhb$short_name"} = ["nohtml-bold-$long_name"]; ${$rexpansion}{"nhi$short_name"} = ["nohtml-italic-$long_name"]; } # abbreviations for all other html options ${$rexpansion}{"hcbg"} = ["html-color-background"]; ${$rexpansion}{"pre"} = ["html-pre-only"]; ${$rexpansion}{"toc"} = ["html-table-of-contents"]; ${$rexpansion}{"ntoc"} = ["nohtml-table-of-contents"]; ${$rexpansion}{"nnn"} = ["html-line-numbers"]; ${$rexpansion}{"hent"} = ["html-entities"]; ${$rexpansion}{"nhent"} = ["nohtml-entities"]; ${$rexpansion}{"css"} = ["html-linked-style-sheet"]; ${$rexpansion}{"nss"} = ["nohtml-style-sheets"]; ${$rexpansion}{"ss"} = ["stylesheet"]; ${$rexpansion}{"pod"} = ["pod2html"]; ${$rexpansion}{"npod"} = ["nopod2html"]; ${$rexpansion}{"frm"} = ["frames"]; ${$rexpansion}{"nfrm"} = ["noframes"]; ${$rexpansion}{"text"} = ["html-toc-extension"]; ${$rexpansion}{"sext"} = ["html-src-extension"]; } sub check_options { # This will be called once after options have been parsed my $class = shift; $rOpts = shift; # X11 color names for default settings that seemed to look ok # (these color names are only used for programming clarity; the hex # numbers are actually written) use constant ForestGreen => "#228B22"; use constant SaddleBrown => "#8B4513"; use constant magenta4 => "#8B008B"; use constant IndianRed3 => "#CD5555"; use constant DeepSkyBlue4 => "#00688B"; use constant MediumOrchid3 => "#B452CD"; use constant black => "#000000"; use constant white => "#FFFFFF"; use constant red => "#FF0000"; # set default color, bold, italic properties # anything not listed here will be given the default (punctuation) color -- # these types currently not listed and get default: ws pu s sc cm co p # When adding NEW_TOKENS: add an entry here if you don't want defaults # set_default_properties( $short_name, default_color, bold?, italic? ); set_default_properties( 'c', ForestGreen, 0, 0 ); set_default_properties( 'pd', ForestGreen, 0, 1 ); set_default_properties( 'k', magenta4, 1, 0 ); # was SaddleBrown set_default_properties( 'q', IndianRed3, 0, 0 ); set_default_properties( 'hh', IndianRed3, 0, 1 ); set_default_properties( 'h', IndianRed3, 1, 0 ); set_default_properties( 'i', DeepSkyBlue4, 0, 0 ); set_default_properties( 'w', black, 0, 0 ); set_default_properties( 'n', MediumOrchid3, 0, 0 ); set_default_properties( 'v', MediumOrchid3, 0, 0 ); set_default_properties( 'j', IndianRed3, 1, 0 ); set_default_properties( 'm', red, 1, 0 ); set_default_color( 'html-color-background', white ); set_default_color( 'html-color-punctuation', black ); # setup property lookup tables for tokens based on their short names # every token type has a short name, and will use these tables # to do the html markup while ( my ( $short_name, $long_name ) = each %short_to_long_names ) { $html_color{$short_name} = $rOpts->{"html-color-$long_name"}; $html_bold{$short_name} = $rOpts->{"html-bold-$long_name"}; $html_italic{$short_name} = $rOpts->{"html-italic-$long_name"}; } # write style sheet to STDOUT and die if requested if ( defined( $rOpts->{'stylesheet'} ) ) { write_style_sheet_file('-'); exit 1; } # make sure user gives a file name after -css if ( defined( $rOpts->{'html-linked-style-sheet'} ) ) { $css_linkname = $rOpts->{'html-linked-style-sheet'}; if ( $css_linkname =~ /^-/ ) { die "You must specify a valid filename after -css\n"; } } # check for conflict if ( $css_linkname && $rOpts->{'nohtml-style-sheets'} ) { $rOpts->{'nohtml-style-sheets'} = 0; warning("You can't specify both -css and -nss; -nss ignored\n"); } # write a style sheet file if necessary if ($css_linkname) { # if the selected filename exists, don't write, because user may # have done some work by hand to create it; use backup name instead # Also, this will avoid a potential disaster in which the user # forgets to specify the style sheet, like this: # perltidy -html -css myfile1.pl myfile2.pl # This would cause myfile1.pl to parsed as the style sheet by GetOpts my $css_filename = $css_linkname; unless ( -e $css_filename ) { write_style_sheet_file($css_filename); } } $missing_html_entities = 1 unless $rOpts->{'html-entities'}; } sub write_style_sheet_file { my $css_filename = shift; my $fh; unless ( $fh = IO::File->new("> $css_filename") ) { die "can't open $css_filename: $!\n"; } write_style_sheet_data($fh); eval { $fh->close }; } sub write_style_sheet_data { # write the style sheet data to an open file handle my $fh = shift; my $bg_color = $rOpts->{'html-color-background'}; my $text_color = $rOpts->{'html-color-punctuation'}; # pre-bgcolor is new, and may not be defined my $pre_bg_color = $rOpts->{'html-pre-color-background'}; $pre_bg_color = $bg_color unless $pre_bg_color; $fh->print(<<"EOM"); /* default style sheet generated by perltidy */ body {background: $bg_color; color: $text_color} pre { color: $text_color; background: $pre_bg_color; font-family: courier; } EOM foreach my $short_name ( sort keys %short_to_long_names ) { my $long_name = $short_to_long_names{$short_name}; my $abbrev = '.' . $short_name; if ( length($short_name) == 1 ) { $abbrev .= ' ' } # for alignment my $color = $html_color{$short_name}; if ( !defined($color) ) { $color = $text_color } $fh->print("$abbrev \{ color: $color;"); if ( $html_bold{$short_name} ) { $fh->print(" font-weight:bold;"); } if ( $html_italic{$short_name} ) { $fh->print(" font-style:italic;"); } $fh->print("} /* $long_name */\n"); } } sub set_default_color { # make sure that options hash $rOpts->{$key} contains a valid color my ( $key, $color ) = @_; if ( $rOpts->{$key} ) { $color = $rOpts->{$key} } $rOpts->{$key} = check_RGB($color); } sub check_RGB { # if color is a 6 digit hex RGB value, prepend a #, otherwise # assume that it is a valid ascii color name my ($color) = @_; if ( $color =~ /^[0-9a-fA-F]{6,6}$/ ) { $color = "#$color" } return $color; } sub set_default_properties { my ( $short_name, $color, $bold, $italic ) = @_; set_default_color( "html-color-$short_to_long_names{$short_name}", $color ); my $key; $key = "html-bold-$short_to_long_names{$short_name}"; $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $bold; $key = "html-italic-$short_to_long_names{$short_name}"; $rOpts->{$key} = ( defined $rOpts->{$key} ) ? $rOpts->{$key} : $italic; } sub pod_to_html { # Use Pod::Html to process the pod and make the page # then merge the perltidy code sections into it. # return 1 if success, 0 otherwise my $self = shift; my ( $pod_string, $css_string, $toc_string, $rpre_string_stack ) = @_; my $input_file = $self->{_input_file}; my $title = $self->{_title}; my $success_flag = 0; # don't try to use pod2html if no pod unless ($pod_string) { return $success_flag; } # Pod::Html requires a real temporary filename # If we are making a frame, we have a name available # Otherwise, we have to fine one my $tmpfile; if ( $rOpts->{'frames'} ) { $tmpfile = $self->{_toc_filename}; } else { $tmpfile = Perl::Tidy::make_temporary_filename(); } my $fh_tmp = IO::File->new( $tmpfile, 'w' ); unless ($fh_tmp) { warn "unable to open temporary file $tmpfile; cannot use pod2html\n"; return $success_flag; } #------------------------------------------------------------------ # Warning: a temporary file is open; we have to clean up if # things go bad. From here on all returns should be by going to # RETURN so that the temporary file gets unlinked. #------------------------------------------------------------------ # write the pod text to the temporary file $fh_tmp->print($pod_string); $fh_tmp->close(); # Hand off the pod to pod2html. # Note that we can use the same temporary filename for input and output # because of the way pod2html works. { my @args; push @args, "--infile=$tmpfile", "--outfile=$tmpfile", "--title=$title"; my $kw; # Flags with string args: # "backlink=s", "cachedir=s", "htmlroot=s", "libpods=s", # "podpath=s", "podroot=s" # Note: -css=s is handled by perltidy itself foreach $kw (qw(backlink cachedir htmlroot libpods podpath podroot)) { if ( $rOpts->{$kw} ) { push @args, "--$kw=$rOpts->{$kw}" } } # Toggle switches; these have extra leading 'pod' # "header!", "index!", "recurse!", "quiet!", "verbose!" foreach $kw (qw(podheader podindex podrecurse podquiet podverbose)) { my $kwd = $kw; # allows us to strip 'pod' if ( $rOpts->{$kw} ) { $kwd =~ s/^pod//; push @args, "--$kwd" } elsif ( defined( $rOpts->{$kw} ) ) { $kwd =~ s/^pod//; push @args, "--no$kwd"; } } # "flush", $kw = 'podflush'; if ( $rOpts->{$kw} ) { $kw =~ s/^pod//; push @args, "--$kw" } # Must clean up if pod2html dies (it can); # Be careful not to overwrite callers __DIE__ routine local $SIG{__DIE__} = sub { print $_[0]; unlink $tmpfile if -e $tmpfile; exit 1; }; pod2html(@args); } $fh_tmp = IO::File->new( $tmpfile, 'r' ); unless ($fh_tmp) { # this error shouldn't happen ... we just used this filename warn "unable to open temporary file $tmpfile; cannot use pod2html\n"; goto RETURN; } my $html_fh = $self->{_html_fh}; my @toc; my $in_toc; my $no_print; # This routine will write the html selectively and store the toc my $html_print = sub { foreach (@_) { $html_fh->print($_) unless ($no_print); if ($in_toc) { push @toc, $_ } } }; # loop over lines of html output from pod2html and merge in # the necessary perltidy html sections my ( $saw_body, $saw_index, $saw_body_end ); while ( my $line = $fh_tmp->getline() ) { if ( $line =~ /^\s*\s*$/i ) { my $date = localtime; $html_print->("\n"); $html_print->($line); } # Copy the perltidy css, if any, after tag elsif ( $line =~ /^\s*\s*$/i ) { $saw_body = 1; $html_print->($css_string) if $css_string; $html_print->($line); # add a top anchor and heading $html_print->("\n"); $title = escape_html($title); $html_print->("

    $title

    \n"); } elsif ( $line =~ /^\s*\s*$/i ) { $in_toc = 1; # when frames are used, an extra table of contents in the # contents panel is confusing, so don't print it $no_print = $rOpts->{'frames'} || !$rOpts->{'html-table-of-contents'}; $html_print->("

    Doc Index:

    \n") if $rOpts->{'frames'}; $html_print->($line); } # Copy the perltidy toc, if any, after the Pod::Html toc elsif ( $line =~ /^\s*\s*$/i ) { $saw_index = 1; $html_print->($line); if ($toc_string) { $html_print->("
    \n") if $rOpts->{'frames'}; $html_print->("

    Code Index:

    \n"); my @toc = map { $_ .= "\n" } split /\n/, $toc_string; $html_print->(@toc); } $in_toc = 0; $no_print = 0; } # Copy one perltidy section after each marker elsif ( $line =~ /^(.*)(.*)$/ ) { $line = $2; $html_print->($1) if $1; # Intermingle code and pod sections if we saw multiple =cut's. if ( $self->{_pod_cut_count} > 1 ) { my $rpre_string = shift(@$rpre_string_stack); if ($$rpre_string) { $html_print->('
    ');
                        $html_print->($$rpre_string);
                        $html_print->('
    '); } else { # shouldn't happen: we stored a string before writing # each marker. warn "Problem merging html stream with pod2html; order may be wrong\n"; } $html_print->($line); } # If didn't see multiple =cut lines, we'll put the pod out first # and then the code, because it's less confusing. else { # since we are not intermixing code and pod, we don't need # or want any
    lines which separated pod and code $html_print->($line) unless ( $line =~ /^\s*
    \s*$/i ); } } # Copy any remaining code section before the tag elsif ( $line =~ /^\s*<\/body>\s*$/i ) { $saw_body_end = 1; if (@$rpre_string_stack) { unless ( $self->{_pod_cut_count} > 1 ) { $html_print->('
    '); } while ( my $rpre_string = shift(@$rpre_string_stack) ) { $html_print->('
    ');
                        $html_print->($$rpre_string);
                        $html_print->('
    '); } } $html_print->($line); } else { $html_print->($line); } } $success_flag = 1; unless ($saw_body) { warn "Did not see in pod2html output\n"; $success_flag = 0; } unless ($saw_body_end) { warn "Did not see in pod2html output\n"; $success_flag = 0; } unless ($saw_index) { warn "Did not find INDEX END in pod2html output\n"; $success_flag = 0; } RETURN: eval { $html_fh->close() }; # note that we have to unlink tmpfile before making frames # because the tmpfile may be one of the names used for frames unlink $tmpfile if -e $tmpfile; if ( $success_flag && $rOpts->{'frames'} ) { $self->make_frame( \@toc ); } return $success_flag; } sub make_frame { # Make a frame with table of contents in the left panel # and the text in the right panel. # On entry: # $html_filename contains the no-frames html output # $rtoc is a reference to an array with the table of contents my $self = shift; my ($rtoc) = @_; my $input_file = $self->{_input_file}; my $html_filename = $self->{_html_file}; my $toc_filename = $self->{_toc_filename}; my $src_filename = $self->{_src_filename}; my $title = $self->{_title}; $title = escape_html($title); # FUTURE input parameter: my $top_basename = ""; # We need to produce 3 html files: # 1. - the table of contents # 2. - the contents (source code) itself # 3. - the frame which contains them # get basenames for relative links my ( $toc_basename, $toc_path ) = fileparse($toc_filename); my ( $src_basename, $src_path ) = fileparse($src_filename); # 1. Make the table of contents panel, with appropriate changes # to the anchor names my $src_frame_name = 'SRC'; my $first_anchor = write_toc_html( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ); # 2. The current .html filename is renamed to be the contents panel rename( $html_filename, $src_filename ) or die "Cannot rename $html_filename to $src_filename:$!\n"; # 3. Then use the original html filename for the frame write_frame_html( $title, $html_filename, $top_basename, $toc_basename, $src_basename, $src_frame_name ); } sub write_toc_html { # write a separate html table of contents file for frames my ( $title, $toc_filename, $src_basename, $rtoc, $src_frame_name ) = @_; my $fh = IO::File->new( $toc_filename, 'w' ) or die "Cannot open $toc_filename:$!\n"; $fh->print(< $title

    $title

    EOM my $first_anchor = change_anchor_names( $rtoc, $src_basename, "$src_frame_name" ); $fh->print( join "", @$rtoc ); $fh->print(< EOM } sub write_frame_html { # write an html file to be the table of contents frame my ( $title, $frame_filename, $top_basename, $toc_basename, $src_basename, $src_frame_name ) = @_; my $fh = IO::File->new( $frame_filename, 'w' ) or die "Cannot open $toc_basename:$!\n"; $fh->print(< $title EOM # two left panels, one right, if master index file if ($top_basename) { $fh->print(< EOM } # one left panels, one right, if no master index file else { $fh->print(< EOM } $fh->print(< <body> <p>If you see this message, you are using a non-frame-capable web client.</p> <p>This document contains:</p> <ul> <li><a href="$toc_basename">A table of contents</a></li> <li><a href="$src_basename">The source code</a></li> </ul> </body> EOM } sub change_anchor_names { # add a filename and target to anchors # also return the first anchor my ( $rlines, $filename, $target ) = @_; my $first_anchor; foreach my $line (@$rlines) { # We're looking for lines like this: #
  • SYNOPSIS
  • # ---- - -------- ----------------- # $1 $4 $5 if ( $line =~ /^(.*)]*>(.*)$/i ) { my $pre = $1; my $name = $4; my $post = $5; my $href = "$filename#$name"; $line = "$pre$post\n"; unless ($first_anchor) { $first_anchor = $href } } } return $first_anchor; } sub close_html_file { my $self = shift; return unless $self->{_html_file_opened}; my $html_fh = $self->{_html_fh}; my $rtoc_string = $self->{_rtoc_string}; # There are 3 basic paths to html output... # --------------------------------- # Path 1: finish up if in -pre mode # --------------------------------- if ( $rOpts->{'html-pre-only'} ) { $html_fh->print( <<"PRE_END");
PRE_END eval { $html_fh->close() }; return; } # Finish the index $self->add_toc_item( 'EOF', 'EOF' ); my $rpre_string_stack = $self->{_rpre_string_stack}; # Patch to darken the
 background color in case of pod2html and
    # interleaved code/documentation.  Otherwise, the distinction
    # between code and documentation is blurred.
    if (   $rOpts->{pod2html}
        && $self->{_pod_cut_count} >= 1
        && $rOpts->{'html-color-background'} eq '#FFFFFF' )
    {
        $rOpts->{'html-pre-color-background'} = '#F0F0F0';
    }

    # put the css or its link into a string, if used
    my $css_string;
    my $fh_css = Perl::Tidy::IOScalar->new( \$css_string, 'w' );

    # use css linked to another file
    if ( $rOpts->{'html-linked-style-sheet'} ) {
        $fh_css->print(
            qq()
        );
    }

    # use css embedded in this file
    elsif ( !$rOpts->{'nohtml-style-sheets'} ) {
        $fh_css->print( <<'ENDCSS');

ENDCSS
    }

    # -----------------------------------------------------------
    # path 2: use pod2html if requested
    #         If we fail for some reason, continue on to path 3
    # -----------------------------------------------------------
    if ( $rOpts->{'pod2html'} ) {
        my $rpod_string = $self->{_rpod_string};
        $self->pod_to_html( $$rpod_string, $css_string, $$rtoc_string,
            $rpre_string_stack )
          && return;
    }

    # --------------------------------------------------
    # path 3: write code in html, with pod only in italics
    # --------------------------------------------------
    my $input_file = $self->{_input_file};
    my $title      = escape_html($input_file);
    my $date       = localtime;
    $html_fh->print( <<"HTML_START");




$title
HTML_START

    # output the css, if used
    if ($css_string) {
        $html_fh->print($css_string);
        $html_fh->print( <<"ENDCSS");


ENDCSS
    }
    else {

        $html_fh->print( <<"HTML_START");

{'html-color-background'}\" text=\"$rOpts->{'html-color-punctuation'}\">
HTML_START
    }

    $html_fh->print("\n");
    $html_fh->print( <<"EOM");

$title

EOM # copy the table of contents if ( $$rtoc_string && !$rOpts->{'frames'} && $rOpts->{'html-table-of-contents'} ) { $html_fh->print($$rtoc_string); } # copy the pre section(s) my $fname_comment = $input_file; $fname_comment =~ s/--+/-/g; # protect HTML comment tags $html_fh->print( <<"END_PRE");
END_PRE

    foreach my $rpre_string (@$rpre_string_stack) {
        $html_fh->print($$rpre_string);
    }

    # and finish the html page
    $html_fh->print( <<"HTML_END");
HTML_END eval { $html_fh->close() }; # could be object without close method if ( $rOpts->{'frames'} ) { my @toc = map { $_ .= "\n" } split /\n/, $$rtoc_string; $self->make_frame( \@toc ); } } sub markup_tokens { my $self = shift; my ( $rtokens, $rtoken_type, $rlevels ) = @_; my ( @colored_tokens, $j, $string, $type, $token, $level ); my $rlast_level = $self->{_rlast_level}; my $rpackage_stack = $self->{_rpackage_stack}; for ( $j = 0 ; $j < @$rtoken_type ; $j++ ) { $type = $$rtoken_type[$j]; $token = $$rtokens[$j]; $level = $$rlevels[$j]; $level = 0 if ( $level < 0 ); #------------------------------------------------------- # Update the package stack. The package stack is needed to keep # the toc correct because some packages may be declared within # blocks and go out of scope when we leave the block. #------------------------------------------------------- if ( $level > $$rlast_level ) { unless ( $rpackage_stack->[ $level - 1 ] ) { $rpackage_stack->[ $level - 1 ] = 'main'; } $rpackage_stack->[$level] = $rpackage_stack->[ $level - 1 ]; } elsif ( $level < $$rlast_level ) { my $package = $rpackage_stack->[$level]; unless ($package) { $package = 'main' } # if we change packages due to a nesting change, we # have to make an entry in the toc if ( $package ne $rpackage_stack->[ $level + 1 ] ) { $self->add_toc_item( $package, 'package' ); } } $$rlast_level = $level; #------------------------------------------------------- # Intercept a sub name here; split it # into keyword 'sub' and sub name; and add an # entry in the toc #------------------------------------------------------- if ( $type eq 'i' && $token =~ /^(sub\s+)(\w.*)$/ ) { $token = $self->markup_html_element( $1, 'k' ); push @colored_tokens, $token; $token = $2; $type = 'M'; # but don't include sub declarations in the toc; # these wlll have leading token types 'i;' my $signature = join "", @$rtoken_type; unless ( $signature =~ /^i;/ ) { my $subname = $token; $subname =~ s/[\s\(].*$//; # remove any attributes and prototype $self->add_toc_item( $subname, 'sub' ); } } #------------------------------------------------------- # Intercept a package name here; split it # into keyword 'package' and name; add to the toc, # and update the package stack #------------------------------------------------------- if ( $type eq 'i' && $token =~ /^(package\s+)(\w.*)$/ ) { $token = $self->markup_html_element( $1, 'k' ); push @colored_tokens, $token; $token = $2; $type = 'i'; $self->add_toc_item( "$token", 'package' ); $rpackage_stack->[$level] = $token; } $token = $self->markup_html_element( $token, $type ); push @colored_tokens, $token; } return ( \@colored_tokens ); } sub markup_html_element { my $self = shift; my ( $token, $type ) = @_; return $token if ( $type eq 'b' ); # skip a blank token return $token if ( $token =~ /^\s*$/ ); # skip a blank line $token = escape_html($token); # get the short abbreviation for this token type my $short_name = $token_short_names{$type}; if ( !defined($short_name) ) { $short_name = "pu"; # punctuation is default } # handle style sheets.. if ( !$rOpts->{'nohtml-style-sheets'} ) { if ( $short_name ne 'pu' ) { $token = qq() . $token . ""; } } # handle no style sheets.. else { my $color = $html_color{$short_name}; if ( $color && ( $color ne $rOpts->{'html-color-punctuation'} ) ) { $token = qq() . $token . ""; } if ( $html_italic{$short_name} ) { $token = "$token" } if ( $html_bold{$short_name} ) { $token = "$token" } } return $token; } sub escape_html { my $token = shift; if ($missing_html_entities) { $token =~ s/\&/&/g; $token =~ s/\/>/g; $token =~ s/\"/"/g; } else { HTML::Entities::encode_entities($token); } return $token; } sub finish_formatting { # called after last line my $self = shift; $self->close_html_file(); return; } sub write_line { my $self = shift; return unless $self->{_html_file_opened}; my $html_pre_fh = $self->{_html_pre_fh}; my ($line_of_tokens) = @_; my $line_type = $line_of_tokens->{_line_type}; my $input_line = $line_of_tokens->{_line_text}; my $line_number = $line_of_tokens->{_line_number}; chomp $input_line; # markup line of code.. my $html_line; if ( $line_type eq 'CODE' ) { my $rtoken_type = $line_of_tokens->{_rtoken_type}; my $rtokens = $line_of_tokens->{_rtokens}; my $rlevels = $line_of_tokens->{_rlevels}; if ( $input_line =~ /(^\s*)/ ) { $html_line = $1; } else { $html_line = ""; } my ($rcolored_tokens) = $self->markup_tokens( $rtokens, $rtoken_type, $rlevels ); $html_line .= join '', @$rcolored_tokens; } # markup line of non-code.. else { my $line_character; if ( $line_type eq 'HERE' ) { $line_character = 'H' } elsif ( $line_type eq 'HERE_END' ) { $line_character = 'h' } elsif ( $line_type eq 'FORMAT' ) { $line_character = 'H' } elsif ( $line_type eq 'FORMAT_END' ) { $line_character = 'h' } elsif ( $line_type eq 'SYSTEM' ) { $line_character = 'c' } elsif ( $line_type eq 'END_START' ) { $line_character = 'k'; $self->add_toc_item( '__END__', '__END__' ); } elsif ( $line_type eq 'DATA_START' ) { $line_character = 'k'; $self->add_toc_item( '__DATA__', '__DATA__' ); } elsif ( $line_type =~ /^POD/ ) { $line_character = 'P'; if ( $rOpts->{'pod2html'} ) { my $html_pod_fh = $self->{_html_pod_fh}; if ( $line_type eq 'POD_START' ) { my $rpre_string_stack = $self->{_rpre_string_stack}; my $rpre_string = $rpre_string_stack->[-1]; # if we have written any non-blank lines to the # current pre section, start writing to a new output # string if ( $$rpre_string =~ /\S/ ) { my $pre_string; $html_pre_fh = Perl::Tidy::IOScalar->new( \$pre_string, 'w' ); $self->{_html_pre_fh} = $html_pre_fh; push @$rpre_string_stack, \$pre_string; # leave a marker in the pod stream so we know # where to put the pre section we just # finished. my $for_html = '=for html'; # don't confuse pod utils $html_pod_fh->print(< EOM } # otherwise, just clear the current string and start # over else { $$rpre_string = ""; $html_pod_fh->print("\n"); } } $html_pod_fh->print( $input_line . "\n" ); if ( $line_type eq 'POD_END' ) { $self->{_pod_cut_count}++; $html_pod_fh->print("\n"); } return; } } else { $line_character = 'Q' } $html_line = $self->markup_html_element( $input_line, $line_character ); } # add the line number if requested if ( $rOpts->{'html-line-numbers'} ) { my $extra_space .= ( $line_number < 10 ) ? " " : ( $line_number < 100 ) ? " " : ( $line_number < 1000 ) ? " " : ""; $html_line = $extra_space . $line_number . " " . $html_line; } # write the line $html_pre_fh->print("$html_line\n"); } ##################################################################### # # The Perl::Tidy::Formatter package adds indentation, whitespace, and # line breaks to the token stream # # WARNING: This is not a real class for speed reasons. Only one # Formatter may be used. # ##################################################################### package Perl::Tidy::Formatter; BEGIN { # Caution: these debug flags produce a lot of output # They should all be 0 except when debugging small scripts use constant FORMATTER_DEBUG_FLAG_BOND => 0; use constant FORMATTER_DEBUG_FLAG_BREAK => 0; use constant FORMATTER_DEBUG_FLAG_CI => 0; use constant FORMATTER_DEBUG_FLAG_FLUSH => 0; use constant FORMATTER_DEBUG_FLAG_FORCE => 0; use constant FORMATTER_DEBUG_FLAG_LIST => 0; use constant FORMATTER_DEBUG_FLAG_NOBREAK => 0; use constant FORMATTER_DEBUG_FLAG_OUTPUT => 0; use constant FORMATTER_DEBUG_FLAG_SPARSE => 0; use constant FORMATTER_DEBUG_FLAG_STORE => 0; use constant FORMATTER_DEBUG_FLAG_UNDOBP => 0; use constant FORMATTER_DEBUG_FLAG_WHITE => 0; my $debug_warning = sub { print "FORMATTER_DEBUGGING with key $_[0]\n"; }; FORMATTER_DEBUG_FLAG_BOND && $debug_warning->('BOND'); FORMATTER_DEBUG_FLAG_BREAK && $debug_warning->('BREAK'); FORMATTER_DEBUG_FLAG_CI && $debug_warning->('CI'); FORMATTER_DEBUG_FLAG_FLUSH && $debug_warning->('FLUSH'); FORMATTER_DEBUG_FLAG_FORCE && $debug_warning->('FORCE'); FORMATTER_DEBUG_FLAG_LIST && $debug_warning->('LIST'); FORMATTER_DEBUG_FLAG_NOBREAK && $debug_warning->('NOBREAK'); FORMATTER_DEBUG_FLAG_OUTPUT && $debug_warning->('OUTPUT'); FORMATTER_DEBUG_FLAG_SPARSE && $debug_warning->('SPARSE'); FORMATTER_DEBUG_FLAG_STORE && $debug_warning->('STORE'); FORMATTER_DEBUG_FLAG_UNDOBP && $debug_warning->('UNDOBP'); FORMATTER_DEBUG_FLAG_WHITE && $debug_warning->('WHITE'); } use Carp; use vars qw{ @gnu_stack $max_gnu_stack_index $gnu_position_predictor $line_start_index_to_go $last_indentation_written $last_unadjusted_indentation $last_leading_token $saw_VERSION_in_this_file $saw_END_or_DATA_ @gnu_item_list $max_gnu_item_index $gnu_sequence_number $last_output_indentation %last_gnu_equals %gnu_comma_count %gnu_arrow_count @block_type_to_go @type_sequence_to_go @container_environment_to_go @bond_strength_to_go @forced_breakpoint_to_go @lengths_to_go @levels_to_go @leading_spaces_to_go @reduced_spaces_to_go @matching_token_to_go @mate_index_to_go @nesting_blocks_to_go @ci_levels_to_go @nesting_depth_to_go @nobreak_to_go @old_breakpoint_to_go @tokens_to_go @types_to_go %saved_opening_indentation $max_index_to_go $comma_count_in_batch $old_line_count_in_batch $last_nonblank_index_to_go $last_nonblank_type_to_go $last_nonblank_token_to_go $last_last_nonblank_index_to_go $last_last_nonblank_type_to_go $last_last_nonblank_token_to_go @nonblank_lines_at_depth $starting_in_quote $ending_in_quote $in_format_skipping_section $format_skipping_pattern_begin $format_skipping_pattern_end $forced_breakpoint_count $forced_breakpoint_undo_count @forced_breakpoint_undo_stack %postponed_breakpoint $tabbing $embedded_tab_count $first_embedded_tab_at $last_embedded_tab_at $deleted_semicolon_count $first_deleted_semicolon_at $last_deleted_semicolon_at $added_semicolon_count $first_added_semicolon_at $last_added_semicolon_at $first_tabbing_disagreement $last_tabbing_disagreement $in_tabbing_disagreement $tabbing_disagreement_count $input_line_tabbing $last_line_type $last_line_leading_type $last_line_leading_level $last_last_line_leading_level %block_leading_text %block_opening_line_number $csc_new_statement_ok $accumulating_text_for_block $leading_block_text $rleading_block_if_elsif_text $leading_block_text_level $leading_block_text_length_exceeded $leading_block_text_line_length $leading_block_text_line_number $closing_side_comment_prefix_pattern $closing_side_comment_list_pattern $last_nonblank_token $last_nonblank_type $last_last_nonblank_token $last_last_nonblank_type $last_nonblank_block_type $last_output_level %is_do_follower %is_if_brace_follower %space_after_keyword $rbrace_follower $looking_for_else %is_last_next_redo_return %is_other_brace_follower %is_else_brace_follower %is_anon_sub_brace_follower %is_anon_sub_1_brace_follower %is_sort_map_grep %is_sort_map_grep_eval %is_sort_map_grep_eval_do %is_block_without_semicolon %is_if_unless %is_and_or %is_assignment %is_chain_operator %is_if_unless_and_or_last_next_redo_return %is_until_while_for_if_elsif_else @has_broken_sublist @dont_align @want_comma_break $is_static_block_comment $index_start_one_line_block $semicolons_before_block_self_destruct $index_max_forced_break $input_line_number $diagnostics_object $vertical_aligner_object $logger_object $file_writer_object $formatter_self @ci_stack $last_line_had_side_comment %want_break_before %outdent_keyword $static_block_comment_pattern $static_side_comment_pattern %opening_vertical_tightness %closing_vertical_tightness %closing_token_indentation %opening_token_right %stack_opening_token %stack_closing_token $block_brace_vertical_tightness_pattern $rOpts_add_newlines $rOpts_add_whitespace $rOpts_block_brace_tightness $rOpts_block_brace_vertical_tightness $rOpts_brace_left_and_indent $rOpts_comma_arrow_breakpoints $rOpts_break_at_old_keyword_breakpoints $rOpts_break_at_old_comma_breakpoints $rOpts_break_at_old_logical_breakpoints $rOpts_break_at_old_ternary_breakpoints $rOpts_closing_side_comment_else_flag $rOpts_closing_side_comment_maximum_text $rOpts_continuation_indentation $rOpts_cuddled_else $rOpts_delete_old_whitespace $rOpts_fuzzy_line_length $rOpts_indent_columns $rOpts_line_up_parentheses $rOpts_maximum_fields_per_table $rOpts_maximum_line_length $rOpts_short_concatenation_item_length $rOpts_keep_old_blank_lines $rOpts_ignore_old_breakpoints $rOpts_format_skipping $rOpts_space_function_paren $rOpts_space_keyword_paren $rOpts_keep_interior_semicolons $half_maximum_line_length %is_opening_type %is_closing_type %is_keyword_returning_list %tightness %matching_token $rOpts %right_bond_strength %left_bond_strength %binary_ws_rules %want_left_space %want_right_space %is_digraph %is_trigraph $bli_pattern $bli_list_string %is_closing_type %is_opening_type %is_closing_token %is_opening_token }; BEGIN { # default list of block types for which -bli would apply $bli_list_string = 'if else elsif unless while for foreach do : sub'; @_ = qw( .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <> <= >= == =~ !~ != ++ -- /= x= ); @is_digraph{@_} = (1) x scalar(@_); @_ = qw( ... **= <<= >>= &&= ||= //= <=> ); @is_trigraph{@_} = (1) x scalar(@_); @_ = qw( = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= ); @is_assignment{@_} = (1) x scalar(@_); @_ = qw( grep keys map reverse sort split ); @is_keyword_returning_list{@_} = (1) x scalar(@_); @_ = qw(is if unless and or err last next redo return); @is_if_unless_and_or_last_next_redo_return{@_} = (1) x scalar(@_); # always break after a closing curly of these block types: @_ = qw(until while for if elsif else); @is_until_while_for_if_elsif_else{@_} = (1) x scalar(@_); @_ = qw(last next redo return); @is_last_next_redo_return{@_} = (1) x scalar(@_); @_ = qw(sort map grep); @is_sort_map_grep{@_} = (1) x scalar(@_); @_ = qw(sort map grep eval); @is_sort_map_grep_eval{@_} = (1) x scalar(@_); @_ = qw(sort map grep eval do); @is_sort_map_grep_eval_do{@_} = (1) x scalar(@_); @_ = qw(if unless); @is_if_unless{@_} = (1) x scalar(@_); @_ = qw(and or err); @is_and_or{@_} = (1) x scalar(@_); # Identify certain operators which often occur in chains. # Note: the minus (-) causes a side effect of padding of the first line in # something like this (by sub set_logical_padding): # Checkbutton => 'Transmission checked', # -variable => \$TRANS # This usually improves appearance so it seems ok. @_ = qw(&& || and or : ? . + - * /); @is_chain_operator{@_} = (1) x scalar(@_); # We can remove semicolons after blocks preceded by these keywords @_ = qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else unless while until for foreach); @is_block_without_semicolon{@_} = (1) x scalar(@_); # 'L' is token for opening { at hash key @_ = qw" L { ( [ "; @is_opening_type{@_} = (1) x scalar(@_); # 'R' is token for closing } at hash key @_ = qw" R } ) ] "; @is_closing_type{@_} = (1) x scalar(@_); @_ = qw" { ( [ "; @is_opening_token{@_} = (1) x scalar(@_); @_ = qw" } ) ] "; @is_closing_token{@_} = (1) x scalar(@_); } # whitespace codes use constant WS_YES => 1; use constant WS_OPTIONAL => 0; use constant WS_NO => -1; # Token bond strengths. use constant NO_BREAK => 10000; use constant VERY_STRONG => 100; use constant STRONG => 2.1; use constant NOMINAL => 1.1; use constant WEAK => 0.8; use constant VERY_WEAK => 0.55; # values for testing indexes in output array use constant UNDEFINED_INDEX => -1; # Maximum number of little messages; probably need not be changed. use constant MAX_NAG_MESSAGES => 6; # increment between sequence numbers for each type # For example, ?: pairs might have numbers 7,11,15,... use constant TYPE_SEQUENCE_INCREMENT => 4; { # methods to count instances my $_count = 0; sub get_count { $_count; } sub _increment_count { ++$_count } sub _decrement_count { --$_count } } sub trim { # trim leading and trailing whitespace from a string $_[0] =~ s/\s+$//; $_[0] =~ s/^\s+//; return $_[0]; } sub split_words { # given a string containing words separated by whitespace, # return the list of words my ($str) = @_; return unless $str; $str =~ s/\s+$//; $str =~ s/^\s+//; return split( /\s+/, $str ); } # interface to Perl::Tidy::Logger routines sub warning { if ($logger_object) { $logger_object->warning(@_); } } sub complain { if ($logger_object) { $logger_object->complain(@_); } } sub write_logfile_entry { if ($logger_object) { $logger_object->write_logfile_entry(@_); } } sub black_box { if ($logger_object) { $logger_object->black_box(@_); } } sub report_definite_bug { if ($logger_object) { $logger_object->report_definite_bug(); } } sub get_saw_brace_error { if ($logger_object) { $logger_object->get_saw_brace_error(); } } sub we_are_at_the_last_line { if ($logger_object) { $logger_object->we_are_at_the_last_line(); } } # interface to Perl::Tidy::Diagnostics routine sub write_diagnostics { if ($diagnostics_object) { $diagnostics_object->write_diagnostics(@_); } } sub get_added_semicolon_count { my $self = shift; return $added_semicolon_count; } sub DESTROY { $_[0]->_decrement_count(); } sub new { my $class = shift; # we are given an object with a write_line() method to take lines my %defaults = ( sink_object => undef, diagnostics_object => undef, logger_object => undef, ); my %args = ( %defaults, @_ ); $logger_object = $args{logger_object}; $diagnostics_object = $args{diagnostics_object}; # we create another object with a get_line() and peek_ahead() method my $sink_object = $args{sink_object}; $file_writer_object = Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object ); # initialize the leading whitespace stack to negative levels # so that we can never run off the end of the stack $gnu_position_predictor = 0; # where the current token is predicted to be $max_gnu_stack_index = 0; $max_gnu_item_index = -1; $gnu_stack[0] = new_lp_indentation_item( 0, -1, -1, 0, 0 ); @gnu_item_list = (); $last_output_indentation = 0; $last_indentation_written = 0; $last_unadjusted_indentation = 0; $last_leading_token = ""; $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'}; $saw_END_or_DATA_ = 0; @block_type_to_go = (); @type_sequence_to_go = (); @container_environment_to_go = (); @bond_strength_to_go = (); @forced_breakpoint_to_go = (); @lengths_to_go = (); # line length to start of ith token @levels_to_go = (); @matching_token_to_go = (); @mate_index_to_go = (); @nesting_blocks_to_go = (); @ci_levels_to_go = (); @nesting_depth_to_go = (0); @nobreak_to_go = (); @old_breakpoint_to_go = (); @tokens_to_go = (); @types_to_go = (); @leading_spaces_to_go = (); @reduced_spaces_to_go = (); @dont_align = (); @has_broken_sublist = (); @want_comma_break = (); @ci_stack = (""); $first_tabbing_disagreement = 0; $last_tabbing_disagreement = 0; $tabbing_disagreement_count = 0; $in_tabbing_disagreement = 0; $input_line_tabbing = undef; $last_line_type = ""; $last_last_line_leading_level = 0; $last_line_leading_level = 0; $last_line_leading_type = '#'; $last_nonblank_token = ';'; $last_nonblank_type = ';'; $last_last_nonblank_token = ';'; $last_last_nonblank_type = ';'; $last_nonblank_block_type = ""; $last_output_level = 0; $looking_for_else = 0; $embedded_tab_count = 0; $first_embedded_tab_at = 0; $last_embedded_tab_at = 0; $deleted_semicolon_count = 0; $first_deleted_semicolon_at = 0; $last_deleted_semicolon_at = 0; $added_semicolon_count = 0; $first_added_semicolon_at = 0; $last_added_semicolon_at = 0; $last_line_had_side_comment = 0; $is_static_block_comment = 0; %postponed_breakpoint = (); # variables for adding side comments %block_leading_text = (); %block_opening_line_number = (); $csc_new_statement_ok = 1; %saved_opening_indentation = (); $in_format_skipping_section = 0; reset_block_text_accumulator(); prepare_for_new_input_lines(); $vertical_aligner_object = Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object, $logger_object, $diagnostics_object ); if ( $rOpts->{'entab-leading-whitespace'} ) { write_logfile_entry( "Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n" ); } elsif ( $rOpts->{'tabs'} ) { write_logfile_entry("Indentation will be with a tab character\n"); } else { write_logfile_entry( "Indentation will be with $rOpts->{'indent-columns'} spaces\n"); } # This was the start of a formatter referent, but object-oriented # coding has turned out to be too slow here. $formatter_self = {}; bless $formatter_self, $class; # Safety check..this is not a class yet if ( _increment_count() > 1 ) { confess "Attempt to create more than 1 object in $class, which is not a true class yet\n"; } return $formatter_self; } sub prepare_for_new_input_lines { $gnu_sequence_number++; # increment output batch counter %last_gnu_equals = (); %gnu_comma_count = (); %gnu_arrow_count = (); $line_start_index_to_go = 0; $max_gnu_item_index = UNDEFINED_INDEX; $index_max_forced_break = UNDEFINED_INDEX; $max_index_to_go = UNDEFINED_INDEX; $last_nonblank_index_to_go = UNDEFINED_INDEX; $last_nonblank_type_to_go = ''; $last_nonblank_token_to_go = ''; $last_last_nonblank_index_to_go = UNDEFINED_INDEX; $last_last_nonblank_type_to_go = ''; $last_last_nonblank_token_to_go = ''; $forced_breakpoint_count = 0; $forced_breakpoint_undo_count = 0; $rbrace_follower = undef; $lengths_to_go[0] = 0; $old_line_count_in_batch = 1; $comma_count_in_batch = 0; $starting_in_quote = 0; destroy_one_line_block(); } sub write_line { my $self = shift; my ($line_of_tokens) = @_; my $line_type = $line_of_tokens->{_line_type}; my $input_line = $line_of_tokens->{_line_text}; # _line_type codes are: # SYSTEM - system-specific code before hash-bang line # CODE - line of perl code (including comments) # POD_START - line starting pod, such as '=head' # POD - pod documentation text # POD_END - last line of pod section, '=cut' # HERE - text of here-document # HERE_END - last line of here-doc (target word) # FORMAT - format section # FORMAT_END - last line of format section, '.' # DATA_START - __DATA__ line # DATA - unidentified text following __DATA__ # END_START - __END__ line # END - unidentified text following __END__ # ERROR - we are in big trouble, probably not a perl script # put a blank line after an =cut which comes before __END__ and __DATA__ # (required by podchecker) if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) { $file_writer_object->reset_consecutive_blank_lines(); if ( $input_line !~ /^\s*$/ ) { want_blank_line() } } # handle line of code.. if ( $line_type eq 'CODE' ) { # let logger see all non-blank lines of code if ( $input_line !~ /^\s*$/ ) { my $output_line_number = $vertical_aligner_object->get_output_line_number(); black_box( $line_of_tokens, $output_line_number ); } print_line_of_tokens($line_of_tokens); } # handle line of non-code.. else { # set special flags my $skip_line = 0; my $tee_line = 0; if ( $line_type =~ /^POD/ ) { # Pod docs should have a preceding blank line. But be # very careful in __END__ and __DATA__ sections, because: # 1. the user may be using this section for any purpose whatsoever # 2. the blank counters are not active there # It should be safe to request a blank line between an # __END__ or __DATA__ and an immediately following '=head' # type line, (types END_START and DATA_START), but not for # any other lines of type END or DATA. if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; } if ( $rOpts->{'tee-pod'} ) { $tee_line = 1; } if ( !$skip_line && $line_type eq 'POD_START' && $last_line_type !~ /^(END|DATA)$/ ) { want_blank_line(); } } # leave the blank counters in a predictable state # after __END__ or __DATA__ elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) { $file_writer_object->reset_consecutive_blank_lines(); $saw_END_or_DATA_ = 1; } # write unindented non-code line if ( !$skip_line ) { if ($tee_line) { $file_writer_object->tee_on() } write_unindented_line($input_line); if ($tee_line) { $file_writer_object->tee_off() } } } $last_line_type = $line_type; } sub create_one_line_block { $index_start_one_line_block = $_[0]; $semicolons_before_block_self_destruct = $_[1]; } sub destroy_one_line_block { $index_start_one_line_block = UNDEFINED_INDEX; $semicolons_before_block_self_destruct = 0; } sub leading_spaces_to_go { # return the number of indentation spaces for a token in the output stream; # these were previously stored by 'set_leading_whitespace'. return get_SPACES( $leading_spaces_to_go[ $_[0] ] ); } sub get_SPACES { # return the number of leading spaces associated with an indentation # variable $indentation is either a constant number of spaces or an object # with a get_SPACES method. my $indentation = shift; return ref($indentation) ? $indentation->get_SPACES() : $indentation; } sub get_RECOVERABLE_SPACES { # return the number of spaces (+ means shift right, - means shift left) # that we would like to shift a group of lines with the same indentation # to get them to line up with their opening parens my $indentation = shift; return ref($indentation) ? $indentation->get_RECOVERABLE_SPACES() : 0; } sub get_AVAILABLE_SPACES_to_go { my $item = $leading_spaces_to_go[ $_[0] ]; # return the number of available leading spaces associated with an # indentation variable. $indentation is either a constant number of # spaces or an object with a get_AVAILABLE_SPACES method. return ref($item) ? $item->get_AVAILABLE_SPACES() : 0; } sub new_lp_indentation_item { # this is an interface to the IndentationItem class my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_; # A negative level implies not to store the item in the item_list my $index = 0; if ( $level >= 0 ) { $index = ++$max_gnu_item_index; } my $item = Perl::Tidy::IndentationItem->new( $spaces, $level, $ci_level, $available_spaces, $index, $gnu_sequence_number, $align_paren, $max_gnu_stack_index, $line_start_index_to_go, ); if ( $level >= 0 ) { $gnu_item_list[$max_gnu_item_index] = $item; } return $item; } sub set_leading_whitespace { # This routine defines leading whitespace # given: the level and continuation_level of a token, # define: space count of leading string which would apply if it # were the first token of a new line. my ( $level, $ci_level, $in_continued_quote ) = @_; # modify for -bli, which adds one continuation indentation for # opening braces if ( $rOpts_brace_left_and_indent && $max_index_to_go == 0 && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o ) { $ci_level++; } # patch to avoid trouble when input file has negative indentation. # other logic should catch this error. if ( $level < 0 ) { $level = 0 } #------------------------------------------- # handle the standard indentation scheme #------------------------------------------- unless ($rOpts_line_up_parentheses) { my $space_count = $ci_level * $rOpts_continuation_indentation + $level * $rOpts_indent_columns; my $ci_spaces = ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation; if ($in_continued_quote) { $space_count = 0; $ci_spaces = 0; } $leading_spaces_to_go[$max_index_to_go] = $space_count; $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces; return; } #------------------------------------------------------------- # handle case of -lp indentation.. #------------------------------------------------------------- # The continued_quote flag means that this is the first token of a # line, and it is the continuation of some kind of multi-line quote # or pattern. It requires special treatment because it must have no # added leading whitespace. So we create a special indentation item # which is not in the stack. if ($in_continued_quote) { my $space_count = 0; my $available_space = 0; $level = -1; # flag to prevent storing in item_list $leading_spaces_to_go[$max_index_to_go] = $reduced_spaces_to_go[$max_index_to_go] = new_lp_indentation_item( $space_count, $level, $ci_level, $available_space, 0 ); return; } # get the top state from the stack my $space_count = $gnu_stack[$max_gnu_stack_index]->get_SPACES(); my $current_level = $gnu_stack[$max_gnu_stack_index]->get_LEVEL(); my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL(); my $type = $types_to_go[$max_index_to_go]; my $token = $tokens_to_go[$max_index_to_go]; my $total_depth = $nesting_depth_to_go[$max_index_to_go]; if ( $type eq '{' || $type eq '(' ) { $gnu_comma_count{ $total_depth + 1 } = 0; $gnu_arrow_count{ $total_depth + 1 } = 0; # If we come to an opening token after an '=' token of some type, # see if it would be helpful to 'break' after the '=' to save space my $last_equals = $last_gnu_equals{$total_depth}; if ( $last_equals && $last_equals > $line_start_index_to_go ) { # find the position if we break at the '=' my $i_test = $last_equals; if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ } # TESTING ##my $too_close = ($i_test==$max_index_to_go-1); my $test_position = total_line_length( $i_test, $max_index_to_go ); if ( # the equals is not just before an open paren (testing) ##!$too_close && # if we are beyond the midpoint $gnu_position_predictor > $half_maximum_line_length # or we are beyont the 1/4 point and there was an old # break at the equals || ( $gnu_position_predictor > $half_maximum_line_length / 2 && ( $old_breakpoint_to_go[$last_equals] || ( $last_equals > 0 && $old_breakpoint_to_go[ $last_equals - 1 ] ) || ( $last_equals > 1 && $types_to_go[ $last_equals - 1 ] eq 'b' && $old_breakpoint_to_go[ $last_equals - 2 ] ) ) ) ) { # then make the switch -- note that we do not set a real # breakpoint here because we may not really need one; sub # scan_list will do that if necessary $line_start_index_to_go = $i_test + 1; $gnu_position_predictor = $test_position; } } } # Check for decreasing depth .. # Note that one token may have both decreasing and then increasing # depth. For example, (level, ci) can go from (1,1) to (2,0). So, # in this example we would first go back to (1,0) then up to (2,0) # in a single call. if ( $level < $current_level || $ci_level < $current_ci_level ) { # loop to find the first entry at or completely below this level my ( $lev, $ci_lev ); while (1) { if ($max_gnu_stack_index) { # save index of token which closes this level $gnu_stack[$max_gnu_stack_index]->set_CLOSED($max_index_to_go); # Undo any extra indentation if we saw no commas my $available_spaces = $gnu_stack[$max_gnu_stack_index]->get_AVAILABLE_SPACES(); my $comma_count = 0; my $arrow_count = 0; if ( $type eq '}' || $type eq ')' ) { $comma_count = $gnu_comma_count{$total_depth}; $arrow_count = $gnu_arrow_count{$total_depth}; $comma_count = 0 unless $comma_count; $arrow_count = 0 unless $arrow_count; } $gnu_stack[$max_gnu_stack_index]->set_COMMA_COUNT($comma_count); $gnu_stack[$max_gnu_stack_index]->set_ARROW_COUNT($arrow_count); if ( $available_spaces > 0 ) { if ( $comma_count <= 0 || $arrow_count > 0 ) { my $i = $gnu_stack[$max_gnu_stack_index]->get_INDEX(); my $seqno = $gnu_stack[$max_gnu_stack_index] ->get_SEQUENCE_NUMBER(); # Be sure this item was created in this batch. This # should be true because we delete any available # space from open items at the end of each batch. if ( $gnu_sequence_number != $seqno || $i > $max_gnu_item_index ) { warning( "Program bug with -lp. seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n" ); report_definite_bug(); } else { if ( $arrow_count == 0 ) { $gnu_item_list[$i] ->permanently_decrease_AVAILABLE_SPACES( $available_spaces); } else { $gnu_item_list[$i] ->tentatively_decrease_AVAILABLE_SPACES( $available_spaces); } my $j; for ( $j = $i + 1 ; $j <= $max_gnu_item_index ; $j++ ) { $gnu_item_list[$j] ->decrease_SPACES($available_spaces); } } } } # go down one level --$max_gnu_stack_index; $lev = $gnu_stack[$max_gnu_stack_index]->get_LEVEL(); $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_CI_LEVEL(); # stop when we reach a level at or below the current level if ( $lev <= $level && $ci_lev <= $ci_level ) { $space_count = $gnu_stack[$max_gnu_stack_index]->get_SPACES(); $current_level = $lev; $current_ci_level = $ci_lev; last; } } # reached bottom of stack .. should never happen because # only negative levels can get here, and $level was forced # to be positive above. else { warning( "program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n" ); report_definite_bug(); last; } } } # handle increasing depth if ( $level > $current_level || $ci_level > $current_ci_level ) { # Compute the standard incremental whitespace. This will be # the minimum incremental whitespace that will be used. This # choice results in a smooth transition between the gnu-style # and the standard style. my $standard_increment = ( $level - $current_level ) * $rOpts_indent_columns + ( $ci_level - $current_ci_level ) * $rOpts_continuation_indentation; # Now we have to define how much extra incremental space # ("$available_space") we want. This extra space will be # reduced as necessary when long lines are encountered or when # it becomes clear that we do not have a good list. my $available_space = 0; my $align_paren = 0; my $excess = 0; # initialization on empty stack.. if ( $max_gnu_stack_index == 0 ) { $space_count = $level * $rOpts_indent_columns; } # if this is a BLOCK, add the standard increment elsif ($last_nonblank_block_type) { $space_count += $standard_increment; } # if last nonblank token was not structural indentation, # just use standard increment elsif ( $last_nonblank_type ne '{' ) { $space_count += $standard_increment; } # otherwise use the space to the first non-blank level change token else { $space_count = $gnu_position_predictor; my $min_gnu_indentation = $gnu_stack[$max_gnu_stack_index]->get_SPACES(); $available_space = $space_count - $min_gnu_indentation; if ( $available_space >= $standard_increment ) { $min_gnu_indentation += $standard_increment; } elsif ( $available_space > 1 ) { $min_gnu_indentation += $available_space + 1; } elsif ( $last_nonblank_token =~ /^[\{\[\(]$/ ) { if ( ( $tightness{$last_nonblank_token} < 2 ) ) { $min_gnu_indentation += 2; } else { $min_gnu_indentation += 1; } } else { $min_gnu_indentation += $standard_increment; } $available_space = $space_count - $min_gnu_indentation; if ( $available_space < 0 ) { $space_count = $min_gnu_indentation; $available_space = 0; } $align_paren = 1; } # update state, but not on a blank token if ( $types_to_go[$max_index_to_go] ne 'b' ) { $gnu_stack[$max_gnu_stack_index]->set_HAVE_CHILD(1); ++$max_gnu_stack_index; $gnu_stack[$max_gnu_stack_index] = new_lp_indentation_item( $space_count, $level, $ci_level, $available_space, $align_paren ); # If the opening paren is beyond the half-line length, then # we will use the minimum (standard) indentation. This will # help avoid problems associated with running out of space # near the end of a line. As a result, in deeply nested # lists, there will be some indentations which are limited # to this minimum standard indentation. But the most deeply # nested container will still probably be able to shift its # parameters to the right for proper alignment, so in most # cases this will not be noticable. if ( $available_space > 0 && $space_count > $half_maximum_line_length ) { $gnu_stack[$max_gnu_stack_index] ->tentatively_decrease_AVAILABLE_SPACES($available_space); } } } # Count commas and look for non-list characters. Once we see a # non-list character, we give up and don't look for any more commas. if ( $type eq '=>' ) { $gnu_arrow_count{$total_depth}++; # tentatively treating '=>' like '=' for estimating breaks # TODO: this could use some experimentation $last_gnu_equals{$total_depth} = $max_index_to_go; } elsif ( $type eq ',' ) { $gnu_comma_count{$total_depth}++; } elsif ( $is_assignment{$type} ) { $last_gnu_equals{$total_depth} = $max_index_to_go; } # this token might start a new line # if this is a non-blank.. if ( $type ne 'b' ) { # and if .. if ( # this is the first nonblank token of the line $max_index_to_go == 1 && $types_to_go[0] eq 'b' # or previous character was one of these: || $last_nonblank_type_to_go =~ /^([\:\?\,f])$/ # or previous character was opening and this does not close it || ( $last_nonblank_type_to_go eq '{' && $type ne '}' ) || ( $last_nonblank_type_to_go eq '(' and $type ne ')' ) # or this token is one of these: || $type =~ /^([\.]|\|\||\&\&)$/ # or this is a closing structure || ( $last_nonblank_type_to_go eq '}' && $last_nonblank_token_to_go eq $last_nonblank_type_to_go ) # or previous token was keyword 'return' || ( $last_nonblank_type_to_go eq 'k' && ( $last_nonblank_token_to_go eq 'return' && $type ne '{' ) ) # or starting a new line at certain keywords is fine || ( $type eq 'k' && $is_if_unless_and_or_last_next_redo_return{$token} ) # or this is after an assignment after a closing structure || ( $is_assignment{$last_nonblank_type_to_go} && ( $last_last_nonblank_type_to_go =~ /^[\}\)\]]$/ # and it is significantly to the right || $gnu_position_predictor > $half_maximum_line_length ) ) ) { check_for_long_gnu_style_lines(); $line_start_index_to_go = $max_index_to_go; # back up 1 token if we want to break before that type # otherwise, we may strand tokens like '?' or ':' on a line if ( $line_start_index_to_go > 0 ) { if ( $last_nonblank_type_to_go eq 'k' ) { if ( $want_break_before{$last_nonblank_token_to_go} ) { $line_start_index_to_go--; } } elsif ( $want_break_before{$last_nonblank_type_to_go} ) { $line_start_index_to_go--; } } } } # remember the predicted position of this token on the output line if ( $max_index_to_go > $line_start_index_to_go ) { $gnu_position_predictor = total_line_length( $line_start_index_to_go, $max_index_to_go ); } else { $gnu_position_predictor = $space_count + token_sequence_length( $max_index_to_go, $max_index_to_go ); } # store the indentation object for this token # this allows us to manipulate the leading whitespace # (in case we have to reduce indentation to fit a line) without # having to change any token values $leading_spaces_to_go[$max_index_to_go] = $gnu_stack[$max_gnu_stack_index]; $reduced_spaces_to_go[$max_index_to_go] = ( $max_gnu_stack_index > 0 && $ci_level ) ? $gnu_stack[ $max_gnu_stack_index - 1 ] : $gnu_stack[$max_gnu_stack_index]; return; } sub check_for_long_gnu_style_lines { # look at the current estimated maximum line length, and # remove some whitespace if it exceeds the desired maximum # this is only for the '-lp' style return unless ($rOpts_line_up_parentheses); # nothing can be done if no stack items defined for this line return if ( $max_gnu_item_index == UNDEFINED_INDEX ); # see if we have exceeded the maximum desired line length # keep 2 extra free because they are needed in some cases # (result of trial-and-error testing) my $spaces_needed = $gnu_position_predictor - $rOpts_maximum_line_length + 2; return if ( $spaces_needed <= 0 ); # We are over the limit, so try to remove a requested number of # spaces from leading whitespace. We are only allowed to remove # from whitespace items created on this batch, since others have # already been used and cannot be undone. my @candidates = (); my $i; # loop over all whitespace items created for the current batch for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) { my $item = $gnu_item_list[$i]; # item must still be open to be a candidate (otherwise it # cannot influence the current token) next if ( $item->get_CLOSED() >= 0 ); my $available_spaces = $item->get_AVAILABLE_SPACES(); if ( $available_spaces > 0 ) { push( @candidates, [ $i, $available_spaces ] ); } } return unless (@candidates); # sort by available whitespace so that we can remove whitespace # from the maximum available first @candidates = sort { $b->[1] <=> $a->[1] } @candidates; # keep removing whitespace until we are done or have no more my $candidate; foreach $candidate (@candidates) { my ( $i, $available_spaces ) = @{$candidate}; my $deleted_spaces = ( $available_spaces > $spaces_needed ) ? $spaces_needed : $available_spaces; # remove the incremental space from this item $gnu_item_list[$i]->decrease_AVAILABLE_SPACES($deleted_spaces); my $i_debug = $i; # update the leading whitespace of this item and all items # that came after it for ( ; $i <= $max_gnu_item_index ; $i++ ) { my $old_spaces = $gnu_item_list[$i]->get_SPACES(); if ( $old_spaces >= $deleted_spaces ) { $gnu_item_list[$i]->decrease_SPACES($deleted_spaces); } # shouldn't happen except for code bug: else { my $level = $gnu_item_list[$i_debug]->get_LEVEL(); my $ci_level = $gnu_item_list[$i_debug]->get_CI_LEVEL(); my $old_level = $gnu_item_list[$i]->get_LEVEL(); my $old_ci_level = $gnu_item_list[$i]->get_CI_LEVEL(); warning( "program bug with -lp: want to delete $deleted_spaces from item $i, but old=$old_spaces deleted: lev=$level ci=$ci_level deleted: level=$old_level ci=$ci_level\n" ); report_definite_bug(); } } $gnu_position_predictor -= $deleted_spaces; $spaces_needed -= $deleted_spaces; last unless ( $spaces_needed > 0 ); } } sub finish_lp_batch { # This routine is called once after each each output stream batch is # finished to undo indentation for all incomplete -lp # indentation levels. It is too risky to leave a level open, # because then we can't backtrack in case of a long line to follow. # This means that comments and blank lines will disrupt this # indentation style. But the vertical aligner may be able to # get the space back if there are side comments. # this is only for the 'lp' style return unless ($rOpts_line_up_parentheses); # nothing can be done if no stack items defined for this line return if ( $max_gnu_item_index == UNDEFINED_INDEX ); # loop over all whitespace items created for the current batch my $i; for ( $i = 0 ; $i <= $max_gnu_item_index ; $i++ ) { my $item = $gnu_item_list[$i]; # only look for open items next if ( $item->get_CLOSED() >= 0 ); # Tentatively remove all of the available space # (The vertical aligner will try to get it back later) my $available_spaces = $item->get_AVAILABLE_SPACES(); if ( $available_spaces > 0 ) { # delete incremental space for this item $gnu_item_list[$i] ->tentatively_decrease_AVAILABLE_SPACES($available_spaces); # Reduce the total indentation space of any nodes that follow # Note that any such nodes must necessarily be dependents # of this node. foreach ( $i + 1 .. $max_gnu_item_index ) { $gnu_item_list[$_]->decrease_SPACES($available_spaces); } } } return; } sub reduce_lp_indentation { # reduce the leading whitespace at token $i if possible by $spaces_needed # (a large value of $spaces_needed will remove all excess space) # NOTE: to be called from scan_list only for a sequence of tokens # contained between opening and closing parens/braces/brackets my ( $i, $spaces_wanted ) = @_; my $deleted_spaces = 0; my $item = $leading_spaces_to_go[$i]; my $available_spaces = $item->get_AVAILABLE_SPACES(); if ( $available_spaces > 0 && ( ( $spaces_wanted <= $available_spaces ) || !$item->get_HAVE_CHILD() ) ) { # we'll remove these spaces, but mark them as recoverable $deleted_spaces = $item->tentatively_decrease_AVAILABLE_SPACES($spaces_wanted); } return $deleted_spaces; } sub token_sequence_length { # return length of tokens ($ifirst .. $ilast) including first & last # returns 0 if $ifirst > $ilast my $ifirst = shift; my $ilast = shift; return 0 if ( $ilast < 0 || $ifirst > $ilast ); return $lengths_to_go[ $ilast + 1 ] if ( $ifirst < 0 ); return $lengths_to_go[ $ilast + 1 ] - $lengths_to_go[$ifirst]; } sub total_line_length { # return length of a line of tokens ($ifirst .. $ilast) my $ifirst = shift; my $ilast = shift; if ( $ifirst < 0 ) { $ifirst = 0 } return leading_spaces_to_go($ifirst) + token_sequence_length( $ifirst, $ilast ); } sub excess_line_length { # return number of characters by which a line of tokens ($ifirst..$ilast) # exceeds the allowable line length. my $ifirst = shift; my $ilast = shift; if ( $ifirst < 0 ) { $ifirst = 0 } return leading_spaces_to_go($ifirst) + token_sequence_length( $ifirst, $ilast ) - $rOpts_maximum_line_length; } sub finish_formatting { # flush buffer and write any informative messages my $self = shift; flush(); $file_writer_object->decrement_output_line_number() ; # fix up line number since it was incremented we_are_at_the_last_line(); if ( $added_semicolon_count > 0 ) { my $first = ( $added_semicolon_count > 1 ) ? "First" : ""; my $what = ( $added_semicolon_count > 1 ) ? "semicolons were" : "semicolon was"; write_logfile_entry("$added_semicolon_count $what added:\n"); write_logfile_entry( " $first at input line $first_added_semicolon_at\n"); if ( $added_semicolon_count > 1 ) { write_logfile_entry( " Last at input line $last_added_semicolon_at\n"); } write_logfile_entry(" (Use -nasc to prevent semicolon addition)\n"); write_logfile_entry("\n"); } if ( $deleted_semicolon_count > 0 ) { my $first = ( $deleted_semicolon_count > 1 ) ? "First" : ""; my $what = ( $deleted_semicolon_count > 1 ) ? "semicolons were" : "semicolon was"; write_logfile_entry( "$deleted_semicolon_count unnecessary $what deleted:\n"); write_logfile_entry( " $first at input line $first_deleted_semicolon_at\n"); if ( $deleted_semicolon_count > 1 ) { write_logfile_entry( " Last at input line $last_deleted_semicolon_at\n"); } write_logfile_entry(" (Use -ndsc to prevent semicolon deletion)\n"); write_logfile_entry("\n"); } if ( $embedded_tab_count > 0 ) { my $first = ( $embedded_tab_count > 1 ) ? "First" : ""; my $what = ( $embedded_tab_count > 1 ) ? "quotes or patterns" : "quote or pattern"; write_logfile_entry("$embedded_tab_count $what had embedded tabs:\n"); write_logfile_entry( "This means the display of this script could vary with device or software\n" ); write_logfile_entry(" $first at input line $first_embedded_tab_at\n"); if ( $embedded_tab_count > 1 ) { write_logfile_entry( " Last at input line $last_embedded_tab_at\n"); } write_logfile_entry("\n"); } if ($first_tabbing_disagreement) { write_logfile_entry( "First indentation disagreement seen at input line $first_tabbing_disagreement\n" ); } if ($in_tabbing_disagreement) { write_logfile_entry( "Ending with indentation disagreement which started at input line $in_tabbing_disagreement\n" ); } else { if ($last_tabbing_disagreement) { write_logfile_entry( "Last indentation disagreement seen at input line $last_tabbing_disagreement\n" ); } else { write_logfile_entry("No indentation disagreement seen\n"); } } write_logfile_entry("\n"); $vertical_aligner_object->report_anything_unusual(); $file_writer_object->report_line_length_errors(); } sub check_options { # This routine is called to check the Opts hash after it is defined ($rOpts) = @_; my ( $tabbing_string, $tab_msg ); make_static_block_comment_pattern(); make_static_side_comment_pattern(); make_closing_side_comment_prefix(); make_closing_side_comment_list_pattern(); $format_skipping_pattern_begin = make_format_skipping_pattern( 'format-skipping-begin', '#<<<' ); $format_skipping_pattern_end = make_format_skipping_pattern( 'format-skipping-end', '#>>>' ); # If closing side comments ARE selected, then we can safely # delete old closing side comments unless closing side comment # warnings are requested. This is a good idea because it will # eliminate any old csc's which fall below the line count threshold. # We cannot do this if warnings are turned on, though, because we # might delete some text which has been added. So that must # be handled when comments are created. if ( $rOpts->{'closing-side-comments'} ) { if ( !$rOpts->{'closing-side-comment-warnings'} ) { $rOpts->{'delete-closing-side-comments'} = 1; } } # If closing side comments ARE NOT selected, but warnings ARE # selected and we ARE DELETING csc's, then we will pretend to be # adding with a huge interval. This will force the comments to be # generated for comparison with the old comments, but not added. elsif ( $rOpts->{'closing-side-comment-warnings'} ) { if ( $rOpts->{'delete-closing-side-comments'} ) { $rOpts->{'delete-closing-side-comments'} = 0; $rOpts->{'closing-side-comments'} = 1; $rOpts->{'closing-side-comment-interval'} = 100000000; } } make_bli_pattern(); make_block_brace_vertical_tightness_pattern(); if ( $rOpts->{'line-up-parentheses'} ) { if ( $rOpts->{'indent-only'} || !$rOpts->{'add-newlines'} || !$rOpts->{'delete-old-newlines'} ) { warn <{'line-up-parentheses'} = 0; } } # At present, tabs are not compatable with the line-up-parentheses style # (it would be possible to entab the total leading whitespace # just prior to writing the line, if desired). if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) { warn <{'tabs'} = 0; } # Likewise, tabs are not compatable with outdenting.. if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) { warn <{'tabs'} = 0; } if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) { warn <{'tabs'} = 0; } if ( !$rOpts->{'space-for-semicolon'} ) { $want_left_space{'f'} = -1; } if ( $rOpts->{'space-terminal-semicolon'} ) { $want_left_space{';'} = 1; } # implement outdenting preferences for keywords %outdent_keyword = (); unless ( @_ = split_words( $rOpts->{'outdent-keyword-okl'} ) ) { @_ = qw(next last redo goto return); # defaults } # FUTURE: if not a keyword, assume that it is an identifier foreach (@_) { if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) { $outdent_keyword{$_} = 1; } else { warn "ignoring '$_' in -okwl list; not a perl keyword"; } } # implement user whitespace preferences if ( @_ = split_words( $rOpts->{'want-left-space'} ) ) { @want_left_space{@_} = (1) x scalar(@_); } if ( @_ = split_words( $rOpts->{'want-right-space'} ) ) { @want_right_space{@_} = (1) x scalar(@_); } if ( @_ = split_words( $rOpts->{'nowant-left-space'} ) ) { @want_left_space{@_} = (-1) x scalar(@_); } if ( @_ = split_words( $rOpts->{'nowant-right-space'} ) ) { @want_right_space{@_} = (-1) x scalar(@_); } if ( $rOpts->{'dump-want-left-space'} ) { dump_want_left_space(*STDOUT); exit 1; } if ( $rOpts->{'dump-want-right-space'} ) { dump_want_right_space(*STDOUT); exit 1; } # default keywords for which space is introduced before an opening paren # (at present, including them messes up vertical alignment) @_ = qw(my local our and or err eq ne if else elsif until unless while for foreach return switch case given when); @space_after_keyword{@_} = (1) x scalar(@_); # allow user to modify these defaults if ( @_ = split_words( $rOpts->{'space-after-keyword'} ) ) { @space_after_keyword{@_} = (1) x scalar(@_); } if ( @_ = split_words( $rOpts->{'nospace-after-keyword'} ) ) { @space_after_keyword{@_} = (0) x scalar(@_); } # implement user break preferences my @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | & = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= . : ? && || and or err xor ); my $break_after = sub { foreach my $tok (@_) { if ( $tok eq '?' ) { $tok = ':' } # patch to coordinate ?/: my $lbs = $left_bond_strength{$tok}; my $rbs = $right_bond_strength{$tok}; if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) { ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) = ( $lbs, $rbs ); } } }; my $break_before = sub { foreach my $tok (@_) { my $lbs = $left_bond_strength{$tok}; my $rbs = $right_bond_strength{$tok}; if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) { ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) = ( $lbs, $rbs ); } } }; $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} ); $break_before->(@all_operators) if ( $rOpts->{'break-before-all-operators'} ); $break_after->( split_words( $rOpts->{'want-break-after'} ) ); $break_before->( split_words( $rOpts->{'want-break-before'} ) ); # make note if breaks are before certain key types %want_break_before = (); foreach my $tok ( @all_operators, ',' ) { $want_break_before{$tok} = $left_bond_strength{$tok} < $right_bond_strength{$tok}; } # Coordinate ?/: breaks, which must be similar if ( !$want_break_before{':'} ) { $want_break_before{'?'} = $want_break_before{':'}; $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01; $left_bond_strength{'?'} = NO_BREAK; } # Define here tokens which may follow the closing brace of a do statement # on the same line, as in: # } while ( $something); @_ = qw(until while unless if ; : ); push @_, ','; @is_do_follower{@_} = (1) x scalar(@_); # These tokens may follow the closing brace of an if or elsif block. # In other words, for cuddled else we want code to look like: # } elsif ( $something) { # } else { if ( $rOpts->{'cuddled-else'} ) { @_ = qw(else elsif); @is_if_brace_follower{@_} = (1) x scalar(@_); } else { %is_if_brace_follower = (); } # nothing can follow the closing curly of an else { } block: %is_else_brace_follower = (); # what can follow a multi-line anonymous sub definition closing curly: @_ = qw# ; : => or and && || ~~ !~~ ) #; push @_, ','; @is_anon_sub_brace_follower{@_} = (1) x scalar(@_); # what can follow a one-line anonynomous sub closing curly: # one-line anonumous subs also have ']' here... # see tk3.t and PP.pm @_ = qw# ; : => or and && || ) ] ~~ !~~ #; push @_, ','; @is_anon_sub_1_brace_follower{@_} = (1) x scalar(@_); # What can follow a closing curly of a block # which is not an if/elsif/else/do/sort/map/grep/eval/sub # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl' @_ = qw# ; : => or and && || ) #; push @_, ','; # allow cuddled continue if cuddled else is specified if ( $rOpts->{'cuddled-else'} ) { push @_, 'continue'; } @is_other_brace_follower{@_} = (1) x scalar(@_); $right_bond_strength{'{'} = WEAK; $left_bond_strength{'{'} = VERY_STRONG; # make -l=0 equal to -l=infinite if ( !$rOpts->{'maximum-line-length'} ) { $rOpts->{'maximum-line-length'} = 1000000; } # make -lbl=0 equal to -lbl=infinite if ( !$rOpts->{'long-block-line-count'} ) { $rOpts->{'long-block-line-count'} = 1000000; } my $ole = $rOpts->{'output-line-ending'}; if ($ole) { my %endings = ( dos => "\015\012", win => "\015\012", mac => "\015", unix => "\012", ); $ole = lc $ole; unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) { my $str = join " ", keys %endings; die <{'preserve-line-endings'} ) { warn "Ignoring -ple; conflicts with -ole\n"; $rOpts->{'preserve-line-endings'} = undef; } } # hashes used to simplify setting whitespace %tightness = ( '{' => $rOpts->{'brace-tightness'}, '}' => $rOpts->{'brace-tightness'}, '(' => $rOpts->{'paren-tightness'}, ')' => $rOpts->{'paren-tightness'}, '[' => $rOpts->{'square-bracket-tightness'}, ']' => $rOpts->{'square-bracket-tightness'}, ); %matching_token = ( '{' => '}', '(' => ')', '[' => ']', '?' => ':', ); # frequently used parameters $rOpts_add_newlines = $rOpts->{'add-newlines'}; $rOpts_add_whitespace = $rOpts->{'add-whitespace'}; $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'}; $rOpts_block_brace_vertical_tightness = $rOpts->{'block-brace-vertical-tightness'}; $rOpts_brace_left_and_indent = $rOpts->{'brace-left-and-indent'}; $rOpts_comma_arrow_breakpoints = $rOpts->{'comma-arrow-breakpoints'}; $rOpts_break_at_old_ternary_breakpoints = $rOpts->{'break-at-old-ternary-breakpoints'}; $rOpts_break_at_old_comma_breakpoints = $rOpts->{'break-at-old-comma-breakpoints'}; $rOpts_break_at_old_keyword_breakpoints = $rOpts->{'break-at-old-keyword-breakpoints'}; $rOpts_break_at_old_logical_breakpoints = $rOpts->{'break-at-old-logical-breakpoints'}; $rOpts_closing_side_comment_else_flag = $rOpts->{'closing-side-comment-else-flag'}; $rOpts_closing_side_comment_maximum_text = $rOpts->{'closing-side-comment-maximum-text'}; $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'}; $rOpts_cuddled_else = $rOpts->{'cuddled-else'}; $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'}; $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'}; $rOpts_indent_columns = $rOpts->{'indent-columns'}; $rOpts_line_up_parentheses = $rOpts->{'line-up-parentheses'}; $rOpts_maximum_fields_per_table = $rOpts->{'maximum-fields-per-table'}; $rOpts_maximum_line_length = $rOpts->{'maximum-line-length'}; $rOpts_short_concatenation_item_length = $rOpts->{'short-concatenation-item-length'}; $rOpts_keep_old_blank_lines = $rOpts->{'keep-old-blank-lines'}; $rOpts_ignore_old_breakpoints = $rOpts->{'ignore-old-breakpoints'}; $rOpts_format_skipping = $rOpts->{'format-skipping'}; $rOpts_space_function_paren = $rOpts->{'space-function-paren'}; $rOpts_space_keyword_paren = $rOpts->{'space-keyword-paren'}; $rOpts_keep_interior_semicolons = $rOpts->{'keep-interior-semicolons'}; $half_maximum_line_length = $rOpts_maximum_line_length / 2; # Note that both opening and closing tokens can access the opening # and closing flags of their container types. %opening_vertical_tightness = ( '(' => $rOpts->{'paren-vertical-tightness'}, '{' => $rOpts->{'brace-vertical-tightness'}, '[' => $rOpts->{'square-bracket-vertical-tightness'}, ')' => $rOpts->{'paren-vertical-tightness'}, '}' => $rOpts->{'brace-vertical-tightness'}, ']' => $rOpts->{'square-bracket-vertical-tightness'}, ); %closing_vertical_tightness = ( '(' => $rOpts->{'paren-vertical-tightness-closing'}, '{' => $rOpts->{'brace-vertical-tightness-closing'}, '[' => $rOpts->{'square-bracket-vertical-tightness-closing'}, ')' => $rOpts->{'paren-vertical-tightness-closing'}, '}' => $rOpts->{'brace-vertical-tightness-closing'}, ']' => $rOpts->{'square-bracket-vertical-tightness-closing'}, ); # assume flag for '>' same as ')' for closing qw quotes %closing_token_indentation = ( ')' => $rOpts->{'closing-paren-indentation'}, '}' => $rOpts->{'closing-brace-indentation'}, ']' => $rOpts->{'closing-square-bracket-indentation'}, '>' => $rOpts->{'closing-paren-indentation'}, ); %opening_token_right = ( '(' => $rOpts->{'opening-paren-right'}, '{' => $rOpts->{'opening-hash-brace-right'}, '[' => $rOpts->{'opening-square-bracket-right'}, ); %stack_opening_token = ( '(' => $rOpts->{'stack-opening-paren'}, '{' => $rOpts->{'stack-opening-hash-brace'}, '[' => $rOpts->{'stack-opening-square-bracket'}, ); %stack_closing_token = ( ')' => $rOpts->{'stack-closing-paren'}, '}' => $rOpts->{'stack-closing-hash-brace'}, ']' => $rOpts->{'stack-closing-square-bracket'}, ); } sub make_static_block_comment_pattern { # create the pattern used to identify static block comments $static_block_comment_pattern = '^\s*##'; # allow the user to change it if ( $rOpts->{'static-block-comment-prefix'} ) { my $prefix = $rOpts->{'static-block-comment-prefix'}; $prefix =~ s/^\s*//; my $pattern = $prefix; # user may give leading caret to force matching left comments only if ( $prefix !~ /^\^#/ ) { if ( $prefix !~ /^#/ ) { die "ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n"; } $pattern = '^\s*' . $prefix; } eval "'##'=~/$pattern/"; if ($@) { die "ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n"; } $static_block_comment_pattern = $pattern; } } sub make_format_skipping_pattern { my ( $opt_name, $default ) = @_; my $param = $rOpts->{$opt_name}; unless ($param) { $param = $default } $param =~ s/^\s*//; if ( $param !~ /^#/ ) { die "ERROR: the $opt_name parameter '$param' must begin with '#'\n"; } my $pattern = '^' . $param . '\s'; eval "'#'=~/$pattern/"; if ($@) { die "ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"; } return $pattern; } sub make_closing_side_comment_list_pattern { # turn any input list into a regex for recognizing selected block types $closing_side_comment_list_pattern = '^\w+'; if ( defined( $rOpts->{'closing-side-comment-list'} ) && $rOpts->{'closing-side-comment-list'} ) { $closing_side_comment_list_pattern = make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} ); } } sub make_bli_pattern { if ( defined( $rOpts->{'brace-left-and-indent-list'} ) && $rOpts->{'brace-left-and-indent-list'} ) { $bli_list_string = $rOpts->{'brace-left-and-indent-list'}; } $bli_pattern = make_block_pattern( '-blil', $bli_list_string ); } sub make_block_brace_vertical_tightness_pattern { # turn any input list into a regex for recognizing selected block types $block_brace_vertical_tightness_pattern = '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)'; if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} ) && $rOpts->{'block-brace-vertical-tightness-list'} ) { $block_brace_vertical_tightness_pattern = make_block_pattern( '-bbvtl', $rOpts->{'block-brace-vertical-tightness-list'} ); } } sub make_block_pattern { # given a string of block-type keywords, return a regex to match them # The only tricky part is that labels are indicated with a single ':' # and the 'sub' token text may have additional text after it (name of # sub). # # Example: # # input string: "if else elsif unless while for foreach do : sub"; # pattern: '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)'; my ( $abbrev, $string ) = @_; my @list = split_words($string); my @words = (); my %seen; for my $i (@list) { next if $seen{$i}; $seen{$i} = 1; if ( $i eq 'sub' ) { } elsif ( $i eq ':' ) { push @words, '\w+:'; } elsif ( $i =~ /^\w/ ) { push @words, $i; } else { warn "unrecognized block type $i after $abbrev, ignoring\n"; } } my $pattern = '(' . join( '|', @words ) . ')$'; if ( $seen{'sub'} ) { $pattern = '(' . $pattern . '|sub)'; } $pattern = '^' . $pattern; return $pattern; } sub make_static_side_comment_pattern { # create the pattern used to identify static side comments $static_side_comment_pattern = '^##'; # allow the user to change it if ( $rOpts->{'static-side-comment-prefix'} ) { my $prefix = $rOpts->{'static-side-comment-prefix'}; $prefix =~ s/^\s*//; my $pattern = '^' . $prefix; eval "'##'=~/$pattern/"; if ($@) { die "ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n"; } $static_side_comment_pattern = $pattern; } } sub make_closing_side_comment_prefix { # Be sure we have a valid closing side comment prefix my $csc_prefix = $rOpts->{'closing-side-comment-prefix'}; my $csc_prefix_pattern; if ( !defined($csc_prefix) ) { $csc_prefix = '## end'; $csc_prefix_pattern = '^##\s+end'; } else { my $test_csc_prefix = $csc_prefix; if ( $test_csc_prefix !~ /^#/ ) { $test_csc_prefix = '#' . $test_csc_prefix; } # make a regex to recognize the prefix my $test_csc_prefix_pattern = $test_csc_prefix; # escape any special characters $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g; $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern; # allow exact number of intermediate spaces to vary $test_csc_prefix_pattern =~ s/\s+/\\s\+/g; # make sure we have a good pattern # if we fail this we probably have an error in escaping # characters. eval "'##'=~/$test_csc_prefix_pattern/"; if ($@) { # shouldn't happen..must have screwed up escaping, above report_definite_bug(); warn "Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n"; # just warn and keep going with defaults warn "Please consider using a simpler -cscp prefix\n"; warn "Using default -cscp instead; please check output\n"; } else { $csc_prefix = $test_csc_prefix; $csc_prefix_pattern = $test_csc_prefix_pattern; } } $rOpts->{'closing-side-comment-prefix'} = $csc_prefix; $closing_side_comment_prefix_pattern = $csc_prefix_pattern; } sub dump_want_left_space { my $fh = shift; local $" = "\n"; print $fh <1; # $a = $b - III; # and even this: # $a = - III; || ( ( $tokenl eq '-' ) && ( $typer =~ /^[wC]$/ && $tokenr =~ /^[_A-Za-z]/ ) ) # '= -' should not become =- or you will get a warning # about reversed -= # || ($tokenr eq '-') # keep a space between a quote and a bareword to prevent the # bareword from becomming a quote modifier. || ( ( $typel eq 'Q' ) && ( $tokenr =~ /^[a-zA-Z_]/ ) ) # keep a space between a token ending in '$' and any word; # this caused trouble: "die @$ if $@" || ( ( $typel eq 'i' && $tokenl =~ /\$$/ ) && ( $tokenr =~ /^[a-zA-Z_]/ ) ) # perl is very fussy about spaces before << || ( $tokenr =~ /^\<\' is excluded because it never gets space # parentheses and brackets are excluded since they are handled specially # curly braces are included but may be overridden by logic, such as # newline logic. # NEW_TOKENS: create a whitespace rule here. This can be as # simple as adding your new letter to @spaces_both_sides, for # example. @_ = qw" L { ( [ "; @is_opening_type{@_} = (1) x scalar(@_); @_ = qw" R } ) ] "; @is_closing_type{@_} = (1) x scalar(@_); my @spaces_both_sides = qw" + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -= .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~ &&= ||= //= <=> A k f w F n C Y U G v "; my @spaces_left_side = qw" t ! ~ m p { \ h pp mm Z j "; push( @spaces_left_side, '#' ); # avoids warning message my @spaces_right_side = qw" ; } ) ] R J ++ -- **= "; push( @spaces_right_side, ',' ); # avoids warning message @want_left_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides); @want_right_space{@spaces_both_sides} = (1) x scalar(@spaces_both_sides); @want_left_space{@spaces_left_side} = (1) x scalar(@spaces_left_side); @want_right_space{@spaces_left_side} = (-1) x scalar(@spaces_left_side); @want_left_space{@spaces_right_side} = (-1) x scalar(@spaces_right_side); @want_right_space{@spaces_right_side} = (1) x scalar(@spaces_right_side); $want_left_space{'L'} = WS_NO; $want_left_space{'->'} = WS_NO; $want_right_space{'->'} = WS_NO; $want_left_space{'**'} = WS_NO; $want_right_space{'**'} = WS_NO; # hash type information must stay tightly bound # as in : ${xxxx} $binary_ws_rules{'i'}{'L'} = WS_NO; $binary_ws_rules{'i'}{'{'} = WS_YES; $binary_ws_rules{'k'}{'{'} = WS_YES; $binary_ws_rules{'U'}{'{'} = WS_YES; $binary_ws_rules{'i'}{'['} = WS_NO; $binary_ws_rules{'R'}{'L'} = WS_NO; $binary_ws_rules{'R'}{'{'} = WS_NO; $binary_ws_rules{'t'}{'L'} = WS_NO; $binary_ws_rules{'t'}{'{'} = WS_NO; $binary_ws_rules{'}'}{'L'} = WS_NO; $binary_ws_rules{'}'}{'{'} = WS_NO; $binary_ws_rules{'$'}{'L'} = WS_NO; $binary_ws_rules{'$'}{'{'} = WS_NO; $binary_ws_rules{'@'}{'L'} = WS_NO; $binary_ws_rules{'@'}{'{'} = WS_NO; $binary_ws_rules{'='}{'L'} = WS_YES; # the following includes ') {' # as in : if ( xxx ) { yyy } $binary_ws_rules{']'}{'L'} = WS_NO; $binary_ws_rules{']'}{'{'} = WS_NO; $binary_ws_rules{')'}{'{'} = WS_YES; $binary_ws_rules{')'}{'['} = WS_NO; $binary_ws_rules{']'}{'['} = WS_NO; $binary_ws_rules{']'}{'{'} = WS_NO; $binary_ws_rules{'}'}{'['} = WS_NO; $binary_ws_rules{'R'}{'['} = WS_NO; $binary_ws_rules{']'}{'++'} = WS_NO; $binary_ws_rules{']'}{'--'} = WS_NO; $binary_ws_rules{')'}{'++'} = WS_NO; $binary_ws_rules{')'}{'--'} = WS_NO; $binary_ws_rules{'R'}{'++'} = WS_NO; $binary_ws_rules{'R'}{'--'} = WS_NO; ######################################################## # should no longer be necessary (see niek.pl) ##$binary_ws_rules{'k'}{':'} = WS_NO; # keep colon with label ##$binary_ws_rules{'w'}{':'} = WS_NO; ######################################################## $binary_ws_rules{'i'}{'Q'} = WS_YES; $binary_ws_rules{'n'}{'('} = WS_YES; # occurs in 'use package n ()' # FIXME: we need to split 'i' into variables and functions # and have no space for functions but space for variables. For now, # I have a special patch in the special rules below $binary_ws_rules{'i'}{'('} = WS_NO; $binary_ws_rules{'w'}{'('} = WS_NO; $binary_ws_rules{'w'}{'{'} = WS_YES; } my ( $jmax, $rtokens, $rtoken_type, $rblock_type ) = @_; my ( $last_token, $last_type, $last_block_type, $token, $type, $block_type ); my (@white_space_flag); my $j_tight_closing_paren = -1; if ( $max_index_to_go >= 0 ) { $token = $tokens_to_go[$max_index_to_go]; $type = $types_to_go[$max_index_to_go]; $block_type = $block_type_to_go[$max_index_to_go]; } else { $token = ' '; $type = 'b'; $block_type = ''; } # loop over all tokens my ( $j, $ws ); for ( $j = 0 ; $j <= $jmax ; $j++ ) { if ( $$rtoken_type[$j] eq 'b' ) { $white_space_flag[$j] = WS_OPTIONAL; next; } # set a default value, to be changed as needed $ws = undef; $last_token = $token; $last_type = $type; $last_block_type = $block_type; $token = $$rtokens[$j]; $type = $$rtoken_type[$j]; $block_type = $$rblock_type[$j]; #--------------------------------------------------------------- # section 1: # handle space on the inside of opening braces #--------------------------------------------------------------- # /^[L\{\(\[]$/ if ( $is_opening_type{$last_type} ) { $j_tight_closing_paren = -1; # let's keep empty matched braces together: () {} [] # except for BLOCKS if ( $token eq $matching_token{$last_token} ) { if ($block_type) { $ws = WS_YES; } else { $ws = WS_NO; } } else { # we're considering the right of an opening brace # tightness = 0 means always pad inside with space # tightness = 1 means pad inside if "complex" # tightness = 2 means never pad inside with space my $tightness; if ( $last_type eq '{' && $last_token eq '{' && $last_block_type ) { $tightness = $rOpts_block_brace_tightness; } else { $tightness = $tightness{$last_token} } if ( $tightness <= 0 ) { $ws = WS_YES; } elsif ( $tightness > 1 ) { $ws = WS_NO; } else { # Patch to count '-foo' as single token so that # each of $a{-foo} and $a{foo} and $a{'foo'} do # not get spaces with default formatting. my $j_here = $j; ++$j_here if ( $token eq '-' && $last_token eq '{' && $$rtoken_type[ $j + 1 ] eq 'w' ); # $j_next is where a closing token should be if # the container has a single token my $j_next = ( $$rtoken_type[ $j_here + 1 ] eq 'b' ) ? $j_here + 2 : $j_here + 1; my $tok_next = $$rtokens[$j_next]; my $type_next = $$rtoken_type[$j_next]; # for tightness = 1, if there is just one token # within the matching pair, we will keep it tight if ( $tok_next eq $matching_token{$last_token} # but watch out for this: [ [ ] (misc.t) && $last_token ne $token ) { # remember where to put the space for the closing paren $j_tight_closing_paren = $j_next; $ws = WS_NO; } else { $ws = WS_YES; } } } } # done with opening braces and brackets my $ws_1 = $ws if FORMATTER_DEBUG_FLAG_WHITE; #--------------------------------------------------------------- # section 2: # handle space on inside of closing brace pairs #--------------------------------------------------------------- # /[\}\)\]R]/ if ( $is_closing_type{$type} ) { if ( $j == $j_tight_closing_paren ) { $j_tight_closing_paren = -1; $ws = WS_NO; } else { if ( !defined($ws) ) { my $tightness; if ( $type eq '}' && $token eq '}' && $block_type ) { $tightness = $rOpts_block_brace_tightness; } else { $tightness = $tightness{$token} } $ws = ( $tightness > 1 ) ? WS_NO : WS_YES; } } } my $ws_2 = $ws if FORMATTER_DEBUG_FLAG_WHITE; #--------------------------------------------------------------- # section 3: # use the binary table #--------------------------------------------------------------- if ( !defined($ws) ) { $ws = $binary_ws_rules{$last_type}{$type}; } my $ws_3 = $ws if FORMATTER_DEBUG_FLAG_WHITE; #--------------------------------------------------------------- # section 4: # some special cases #--------------------------------------------------------------- if ( $token eq '(' ) { # This will have to be tweaked as tokenization changes. # We usually want a space at '} (', for example: # map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s ); # # But not others: # &{ $_->[1] }( delete $_[$#_]{ $_->[0] } ); # At present, the above & block is marked as type L/R so this case # won't go through here. if ( $last_type eq '}' ) { $ws = WS_YES } # NOTE: some older versions of Perl had occasional problems if # spaces are introduced between keywords or functions and opening # parens. So the default is not to do this except is certain # cases. The current Perl seems to tolerate spaces. # Space between keyword and '(' elsif ( $last_type eq 'k' ) { $ws = WS_NO unless ( $rOpts_space_keyword_paren || $space_after_keyword{$last_token} ); } # Space between function and '(' # ----------------------------------------------------- # 'w' and 'i' checks for something like: # myfun( &myfun( ->myfun( # ----------------------------------------------------- elsif (( $last_type =~ /^[wU]$/ ) || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) ) { $ws = WS_NO unless ($rOpts_space_function_paren); } # space between something like $i and ( in # for $i ( 0 .. 20 ) { # FIXME: eventually, type 'i' needs to be split into multiple # token types so this can be a hardwired rule. elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) { $ws = WS_YES; } # allow constant function followed by '()' to retain no space elsif ( $last_type eq 'C' && $$rtokens[ $j + 1 ] eq ')' ) { $ws = WS_NO; } } # patch for SWITCH/CASE: make space at ']{' optional # since the '{' might begin a case or when block elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) { $ws = WS_OPTIONAL; } # keep space between 'sub' and '{' for anonymous sub definition if ( $type eq '{' ) { if ( $last_token eq 'sub' ) { $ws = WS_YES; } # this is needed to avoid no space in '){' if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES } # avoid any space before the brace or bracket in something like # @opts{'a','b',...} if ( $last_type eq 'i' && $last_token =~ /^\@/ ) { $ws = WS_NO; } } elsif ( $type eq 'i' ) { # never a space before -> if ( $token =~ /^\-\>/ ) { $ws = WS_NO; } } # retain any space between '-' and bare word elsif ( $type eq 'w' || $type eq 'C' ) { $ws = WS_OPTIONAL if $last_type eq '-'; # never a space before -> if ( $token =~ /^\-\>/ ) { $ws = WS_NO; } } # retain any space between '-' and bare word # example: avoid space between 'USER' and '-' here: # $myhash{USER-NAME}='steve'; elsif ( $type eq 'm' || $type eq '-' ) { $ws = WS_OPTIONAL if ( $last_type eq 'w' ); } # always space before side comment elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 } # always preserver whatever space was used after a possible # filehandle (except _) or here doc operator if ( $type ne '#' && ( ( $last_type eq 'Z' && $last_token ne '_' ) || $last_type eq 'h' ) ) { $ws = WS_OPTIONAL; } my $ws_4 = $ws if FORMATTER_DEBUG_FLAG_WHITE; #--------------------------------------------------------------- # section 5: # default rules not covered above #--------------------------------------------------------------- # if we fall through to here, # look at the pre-defined hash tables for the two tokens, and # if (they are equal) use the common value # if (either is zero or undef) use the other # if (either is -1) use it # That is, # left vs right # 1 vs 1 --> 1 # 0 vs 0 --> 0 # -1 vs -1 --> -1 # # 0 vs -1 --> -1 # 0 vs 1 --> 1 # 1 vs 0 --> 1 # -1 vs 0 --> -1 # # -1 vs 1 --> -1 # 1 vs -1 --> -1 if ( !defined($ws) ) { my $wl = $want_left_space{$type}; my $wr = $want_right_space{$last_type}; if ( !defined($wl) ) { $wl = 0 } if ( !defined($wr) ) { $wr = 0 } $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr; } if ( !defined($ws) ) { $ws = 0; write_diagnostics( "WS flag is undefined for tokens $last_token $token\n"); } # Treat newline as a whitespace. Otherwise, we might combine # 'Send' and '-recipients' here according to the above rules: # my $msg = new Fax::Send # -recipients => $to, # -data => $data; if ( $ws == 0 && $j == 0 ) { $ws = 1 } if ( ( $ws == 0 ) && $j > 0 && $j < $jmax && ( $last_type !~ /^[Zh]$/ ) ) { # If this happens, we have a non-fatal but undesirable # hole in the above rules which should be patched. write_diagnostics( "WS flag is zero for tokens $last_token $token\n"); } $white_space_flag[$j] = $ws; FORMATTER_DEBUG_FLAG_WHITE && do { my $str = substr( $last_token, 0, 15 ); $str .= ' ' x ( 16 - length($str) ); if ( !defined($ws_1) ) { $ws_1 = "*" } if ( !defined($ws_2) ) { $ws_2 = "*" } if ( !defined($ws_3) ) { $ws_3 = "*" } if ( !defined($ws_4) ) { $ws_4 = "*" } print "WHITE: i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n"; }; } return \@white_space_flag; } { # begin print_line_of_tokens my $rtoken_type; my $rtokens; my $rlevels; my $rslevels; my $rblock_type; my $rcontainer_type; my $rcontainer_environment; my $rtype_sequence; my $input_line; my $rnesting_tokens; my $rci_levels; my $rnesting_blocks; my $in_quote; my $python_indentation_level; # These local token variables are stored by store_token_to_go: my $block_type; my $ci_level; my $container_environment; my $container_type; my $in_continued_quote; my $level; my $nesting_blocks; my $no_internal_newlines; my $slevel; my $token; my $type; my $type_sequence; # routine to pull the jth token from the line of tokens sub extract_token { my $j = shift; $token = $$rtokens[$j]; $type = $$rtoken_type[$j]; $block_type = $$rblock_type[$j]; $container_type = $$rcontainer_type[$j]; $container_environment = $$rcontainer_environment[$j]; $type_sequence = $$rtype_sequence[$j]; $level = $$rlevels[$j]; $slevel = $$rslevels[$j]; $nesting_blocks = $$rnesting_blocks[$j]; $ci_level = $$rci_levels[$j]; } { my @saved_token; sub save_current_token { @saved_token = ( $block_type, $ci_level, $container_environment, $container_type, $in_continued_quote, $level, $nesting_blocks, $no_internal_newlines, $slevel, $token, $type, $type_sequence, ); } sub restore_current_token { ( $block_type, $ci_level, $container_environment, $container_type, $in_continued_quote, $level, $nesting_blocks, $no_internal_newlines, $slevel, $token, $type, $type_sequence, ) = @saved_token; } } # Routine to place the current token into the output stream. # Called once per output token. sub store_token_to_go { my $flag = $no_internal_newlines; if ( $_[0] ) { $flag = 1 } $tokens_to_go[ ++$max_index_to_go ] = $token; $types_to_go[$max_index_to_go] = $type; $nobreak_to_go[$max_index_to_go] = $flag; $old_breakpoint_to_go[$max_index_to_go] = 0; $forced_breakpoint_to_go[$max_index_to_go] = 0; $block_type_to_go[$max_index_to_go] = $block_type; $type_sequence_to_go[$max_index_to_go] = $type_sequence; $container_environment_to_go[$max_index_to_go] = $container_environment; $nesting_blocks_to_go[$max_index_to_go] = $nesting_blocks; $ci_levels_to_go[$max_index_to_go] = $ci_level; $mate_index_to_go[$max_index_to_go] = -1; $matching_token_to_go[$max_index_to_go] = ''; $bond_strength_to_go[$max_index_to_go] = 0; # Note: negative levels are currently retained as a diagnostic so that # the 'final indentation level' is correctly reported for bad scripts. # But this means that every use of $level as an index must be checked. # If this becomes too much of a problem, we might give up and just clip # them at zero. ## $levels_to_go[$max_index_to_go] = ( $level > 0 ) ? $level : 0; $levels_to_go[$max_index_to_go] = $level; $nesting_depth_to_go[$max_index_to_go] = ( $slevel >= 0 ) ? $slevel : 0; $lengths_to_go[ $max_index_to_go + 1 ] = $lengths_to_go[$max_index_to_go] + length($token); # Define the indentation that this token would have if it started # a new line. We have to do this now because we need to know this # when considering one-line blocks. set_leading_whitespace( $level, $ci_level, $in_continued_quote ); if ( $type ne 'b' ) { $last_last_nonblank_index_to_go = $last_nonblank_index_to_go; $last_last_nonblank_type_to_go = $last_nonblank_type_to_go; $last_last_nonblank_token_to_go = $last_nonblank_token_to_go; $last_nonblank_index_to_go = $max_index_to_go; $last_nonblank_type_to_go = $type; $last_nonblank_token_to_go = $token; if ( $type eq ',' ) { $comma_count_in_batch++; } } FORMATTER_DEBUG_FLAG_STORE && do { my ( $a, $b, $c ) = caller(); print "STORE: from $a $c: storing token $token type $type lev=$level slev=$slevel at $max_index_to_go\n"; }; } sub insert_new_token_to_go { # insert a new token into the output stream. use same level as # previous token; assumes a character at max_index_to_go. save_current_token(); ( $token, $type, $slevel, $no_internal_newlines ) = @_; if ( $max_index_to_go == UNDEFINED_INDEX ) { warning("code bug: bad call to insert_new_token_to_go\n"); } $level = $levels_to_go[$max_index_to_go]; # FIXME: it seems to be necessary to use the next, rather than # previous, value of this variable when creating a new blank (align.t) #my $slevel = $nesting_depth_to_go[$max_index_to_go]; $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go]; $ci_level = $ci_levels_to_go[$max_index_to_go]; $container_environment = $container_environment_to_go[$max_index_to_go]; $in_continued_quote = 0; $block_type = ""; $type_sequence = ""; store_token_to_go(); restore_current_token(); return; } sub print_line_of_tokens { my $line_of_tokens = shift; # This routine is called once per input line to process all of # the tokens on that line. This is the first stage of # beautification. # # Full-line comments and blank lines may be processed immediately. # # For normal lines of code, the tokens are stored one-by-one, # via calls to 'sub store_token_to_go', until a known line break # point is reached. Then, the batch of collected tokens is # passed along to 'sub output_line_to_go' for further # processing. This routine decides if there should be # whitespace between each pair of non-white tokens, so later # routines only need to decide on any additional line breaks. # Any whitespace is initally a single space character. Later, # the vertical aligner may expand that to be multiple space # characters if necessary for alignment. # extract input line number for error messages $input_line_number = $line_of_tokens->{_line_number}; $rtoken_type = $line_of_tokens->{_rtoken_type}; $rtokens = $line_of_tokens->{_rtokens}; $rlevels = $line_of_tokens->{_rlevels}; $rslevels = $line_of_tokens->{_rslevels}; $rblock_type = $line_of_tokens->{_rblock_type}; $rcontainer_type = $line_of_tokens->{_rcontainer_type}; $rcontainer_environment = $line_of_tokens->{_rcontainer_environment}; $rtype_sequence = $line_of_tokens->{_rtype_sequence}; $input_line = $line_of_tokens->{_line_text}; $rnesting_tokens = $line_of_tokens->{_rnesting_tokens}; $rci_levels = $line_of_tokens->{_rci_levels}; $rnesting_blocks = $line_of_tokens->{_rnesting_blocks}; $in_continued_quote = $starting_in_quote = $line_of_tokens->{_starting_in_quote}; $in_quote = $line_of_tokens->{_ending_in_quote}; $ending_in_quote = $in_quote; $python_indentation_level = $line_of_tokens->{_python_indentation_level}; my $j; my $j_next; my $jmax; my $next_nonblank_token; my $next_nonblank_token_type; my $rwhite_space_flag; $jmax = @$rtokens - 1; $block_type = ""; $container_type = ""; $container_environment = ""; $type_sequence = ""; $no_internal_newlines = 1 - $rOpts_add_newlines; $is_static_block_comment = 0; # Handle a continued quote.. if ($in_continued_quote) { # A line which is entirely a quote or pattern must go out # verbatim. Note: the \n is contained in $input_line. if ( $jmax <= 0 ) { if ( ( $input_line =~ "\t" ) ) { note_embedded_tab(); } write_unindented_line("$input_line"); $last_line_had_side_comment = 0; return; } # prior to version 20010406, perltidy had a bug which placed # continuation indentation before the last line of some multiline # quotes and patterns -- exactly the lines passing this way. # To help find affected lines in scripts run with these # versions, run with '-chk', and it will warn of any quotes or # patterns which might have been modified by these early # versions. if ( $rOpts->{'check-multiline-quotes'} && $input_line =~ /^ / ) { warning( "-chk: please check this line for extra leading whitespace\n" ); } } # Write line verbatim if we are in a formatting skip section if ($in_format_skipping_section) { write_unindented_line("$input_line"); $last_line_had_side_comment = 0; # Note: extra space appended to comment simplifies pattern matching if ( $jmax == 0 && $$rtoken_type[0] eq '#' && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_end/o ) { $in_format_skipping_section = 0; write_logfile_entry("Exiting formatting skip section\n"); } return; } # See if we are entering a formatting skip section if ( $rOpts_format_skipping && $jmax == 0 && $$rtoken_type[0] eq '#' && ( $$rtokens[0] . " " ) =~ /$format_skipping_pattern_begin/o ) { flush(); $in_format_skipping_section = 1; write_logfile_entry("Entering formatting skip section\n"); write_unindented_line("$input_line"); $last_line_had_side_comment = 0; return; } # delete trailing blank tokens if ( $jmax > 0 && $$rtoken_type[$jmax] eq 'b' ) { $jmax-- } # Handle a blank line.. if ( $jmax < 0 ) { # If keep-old-blank-lines is zero, we delete all # old blank lines and let the blank line rules generate any # needed blanks. if ($rOpts_keep_old_blank_lines) { flush(); $file_writer_object->write_blank_code_line( $rOpts_keep_old_blank_lines == 2 ); $last_line_leading_type = 'b'; } $last_line_had_side_comment = 0; return; } # see if this is a static block comment (starts with ## by default) my $is_static_block_comment_without_leading_space = 0; if ( $jmax == 0 && $$rtoken_type[0] eq '#' && $rOpts->{'static-block-comments'} && $input_line =~ /$static_block_comment_pattern/o ) { $is_static_block_comment = 1; $is_static_block_comment_without_leading_space = substr( $input_line, 0, 1 ) eq '#'; } # Check for comments which are line directives # Treat exactly as static block comments without leading space # reference: perlsyn, near end, section Plain Old Comments (Not!) # example: '# line 42 "new_filename.plx"' if ( $jmax == 0 && $$rtoken_type[0] eq '#' && $input_line =~ /^\# \s* line \s+ (\d+) \s* (?:\s("?)([^"]+)\2)? \s* $/x ) { $is_static_block_comment = 1; $is_static_block_comment_without_leading_space = 1; } # create a hanging side comment if appropriate if ( $jmax == 0 && $$rtoken_type[0] eq '#' # only token is a comment && $last_line_had_side_comment # last line had side comment && $input_line =~ /^\s/ # there is some leading space && !$is_static_block_comment # do not make static comment hanging && $rOpts->{'hanging-side-comments'} # user is allowing this ) { # We will insert an empty qw string at the start of the token list # to force this comment to be a side comment. The vertical aligner # should then line it up with the previous side comment. unshift @$rtoken_type, 'q'; unshift @$rtokens, ''; unshift @$rlevels, $$rlevels[0]; unshift @$rslevels, $$rslevels[0]; unshift @$rblock_type, ''; unshift @$rcontainer_type, ''; unshift @$rcontainer_environment, ''; unshift @$rtype_sequence, ''; unshift @$rnesting_tokens, $$rnesting_tokens[0]; unshift @$rci_levels, $$rci_levels[0]; unshift @$rnesting_blocks, $$rnesting_blocks[0]; $jmax = 1; } # remember if this line has a side comment $last_line_had_side_comment = ( $jmax > 0 && $$rtoken_type[$jmax] eq '#' ); # Handle a block (full-line) comment.. if ( ( $jmax == 0 ) && ( $$rtoken_type[0] eq '#' ) ) { if ( $rOpts->{'delete-block-comments'} ) { return } if ( $rOpts->{'tee-block-comments'} ) { $file_writer_object->tee_on(); } destroy_one_line_block(); output_line_to_go(); # output a blank line before block comments if ( $last_line_leading_type !~ /^[#b]$/ && $rOpts->{'blanks-before-comments'} # only if allowed && ! $is_static_block_comment # never before static block comments ) { flush(); # switching to new output stream $file_writer_object->write_blank_code_line(); $last_line_leading_type = 'b'; } # TRIM COMMENTS -- This could be turned off as a option $$rtokens[0] =~ s/\s*$//; # trim right end if ( $rOpts->{'indent-block-comments'} && ( !$rOpts->{'indent-spaced-block-comments'} || $input_line =~ /^\s+/ ) && !$is_static_block_comment_without_leading_space ) { extract_token(0); store_token_to_go(); output_line_to_go(); } else { flush(); # switching to new output stream $file_writer_object->write_code_line( $$rtokens[0] . "\n" ); $last_line_leading_type = '#'; } if ( $rOpts->{'tee-block-comments'} ) { $file_writer_object->tee_off(); } return; } # compare input/output indentation except for continuation lines # (because they have an unknown amount of initial blank space) # and lines which are quotes (because they may have been outdented) # Note: this test is placed here because we know the continuation flag # at this point, which allows us to avoid non-meaningful checks. my $structural_indentation_level = $$rlevels[0]; compare_indentation_levels( $python_indentation_level, $structural_indentation_level ) unless ( $python_indentation_level < 0 || ( $$rci_levels[0] > 0 ) || ( ( $python_indentation_level == 0 ) && $$rtoken_type[0] eq 'Q' ) ); # Patch needed for MakeMaker. Do not break a statement # in which $VERSION may be calculated. See MakeMaker.pm; # this is based on the coding in it. # The first line of a file that matches this will be eval'd: # /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ # Examples: # *VERSION = \'1.01'; # ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/; # We will pass such a line straight through without breaking # it unless -npvl is used my $is_VERSION_statement = 0; if ( !$saw_VERSION_in_this_file && $input_line =~ /VERSION/ # quick check to reject most lines && $input_line =~ /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ ) { $saw_VERSION_in_this_file = 1; $is_VERSION_statement = 1; write_logfile_entry("passing VERSION line; -npvl deactivates\n"); $no_internal_newlines = 1; } # take care of indentation-only # NOTE: In previous versions we sent all qw lines out immediately here. # No longer doing this: also write a line which is entirely a 'qw' list # to allow stacking of opening and closing tokens. Note that interior # qw lines will still go out at the end of this routine. if ( $rOpts->{'indent-only'} ) { flush(); trim($input_line); extract_token(0); $token = $input_line; $type = 'q'; $block_type = ""; $container_type = ""; $container_environment = ""; $type_sequence = ""; store_token_to_go(); output_line_to_go(); return; } push( @$rtokens, ' ', ' ' ); # making $j+2 valid simplifies coding push( @$rtoken_type, 'b', 'b' ); ($rwhite_space_flag) = set_white_space_flag( $jmax, $rtokens, $rtoken_type, $rblock_type ); # find input tabbing to allow checks for tabbing disagreement ## not used for now ##$input_line_tabbing = ""; ##if ( $input_line =~ /^(\s*)/ ) { $input_line_tabbing = $1; } # if the buffer hasn't been flushed, add a leading space if # necessary to keep essential whitespace. This is really only # necessary if we are squeezing out all ws. if ( $max_index_to_go >= 0 ) { $old_line_count_in_batch++; if ( is_essential_whitespace( $last_last_nonblank_token, $last_last_nonblank_type, $tokens_to_go[$max_index_to_go], $types_to_go[$max_index_to_go], $$rtokens[0], $$rtoken_type[0] ) ) { my $slevel = $$rslevels[0]; insert_new_token_to_go( ' ', 'b', $slevel, $no_internal_newlines ); } } # If we just saw the end of an elsif block, write nag message # if we do not see another elseif or an else. if ($looking_for_else) { unless ( $$rtokens[0] =~ /^(elsif|else)$/ ) { write_logfile_entry("(No else block)\n"); } $looking_for_else = 0; } # This is a good place to kill incomplete one-line blocks if ( ( $semicolons_before_block_self_destruct == 0 ) && ( $max_index_to_go >= 0 ) && ( $types_to_go[$max_index_to_go] eq ';' ) && ( $$rtokens[0] ne '}' ) ) { destroy_one_line_block(); output_line_to_go(); } # loop to process the tokens one-by-one $type = 'b'; $token = ""; foreach $j ( 0 .. $jmax ) { # pull out the local values for this token extract_token($j); if ( $type eq '#' ) { # trim trailing whitespace # (there is no option at present to prevent this) $token =~ s/\s*$//; if ( $rOpts->{'delete-side-comments'} # delete closing side comments if necessary || ( $rOpts->{'delete-closing-side-comments'} && $token =~ /$closing_side_comment_prefix_pattern/o && $last_nonblank_block_type =~ /$closing_side_comment_list_pattern/o ) ) { if ( $types_to_go[$max_index_to_go] eq 'b' ) { unstore_token_to_go(); } last; } } # If we are continuing after seeing a right curly brace, flush # buffer unless we see what we are looking for, as in # } else ... if ( $rbrace_follower && $type ne 'b' ) { unless ( $rbrace_follower->{$token} ) { output_line_to_go(); } $rbrace_follower = undef; } $j_next = ( $$rtoken_type[ $j + 1 ] eq 'b' ) ? $j + 2 : $j + 1; $next_nonblank_token = $$rtokens[$j_next]; $next_nonblank_token_type = $$rtoken_type[$j_next]; #-------------------------------------------------------- # Start of section to patch token text #-------------------------------------------------------- # Modify certain tokens here for whitespace # The following is not yet done, but could be: # sub (x x x) if ( $type =~ /^[wit]$/ ) { # Examples: # change '$ var' to '$var' etc # '-> new' to '->new' if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) { $token =~ s/\s*//g; } if ( $token =~ /^sub/ ) { $token =~ s/\s+/ /g } } # change 'LABEL :' to 'LABEL:' elsif ( $type eq 'J' ) { $token =~ s/\s+//g } # patch to add space to something like "x10" # This avoids having to split this token in the pre-tokenizer elsif ( $type eq 'n' ) { if ( $token =~ /^x\d+/ ) { $token =~ s/x/x / } } elsif ( $type eq 'Q' ) { note_embedded_tab() if ( $token =~ "\t" ); # make note of something like '$var = s/xxx/yyy/;' # in case it should have been '$var =~ s/xxx/yyy/;' if ( $token =~ /^(s|tr|y|m|\/)/ && $last_nonblank_token =~ /^(=|==|!=)$/ # precededed by simple scalar && $last_last_nonblank_type eq 'i' && $last_last_nonblank_token =~ /^\$/ # followed by some kind of termination # (but give complaint if we can's see far enough ahead) && $next_nonblank_token =~ /^[; \)\}]$/ # scalar is not decleared && !( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(my|our|local)$/ ) ) { my $guess = substr( $last_nonblank_token, 0, 1 ) . '~'; complain( "Note: be sure you want '$last_nonblank_token' instead of '$guess' here\n" ); } } # trim blanks from right of qw quotes # (To avoid trimming qw quotes use -ntqw; the tokenizer handles this) elsif ( $type eq 'q' ) { $token =~ s/\s*$//; note_embedded_tab() if ( $token =~ "\t" ); } #-------------------------------------------------------- # End of section to patch token text #-------------------------------------------------------- # insert any needed whitespace if ( ( $type ne 'b' ) && ( $max_index_to_go >= 0 ) && ( $types_to_go[$max_index_to_go] ne 'b' ) && $rOpts_add_whitespace ) { my $ws = $$rwhite_space_flag[$j]; if ( $ws == 1 ) { insert_new_token_to_go( ' ', 'b', $slevel, $no_internal_newlines ); } } # Do not allow breaks which would promote a side comment to a # block comment. In order to allow a break before an opening # or closing BLOCK, followed by a side comment, those sections # of code will handle this flag separately. my $side_comment_follows = ( $next_nonblank_token_type eq '#' ); my $is_opening_BLOCK = ( $type eq '{' && $token eq '{' && $block_type && $block_type ne 't' ); my $is_closing_BLOCK = ( $type eq '}' && $token eq '}' && $block_type && $block_type ne 't' ); if ( $side_comment_follows && !$is_opening_BLOCK && !$is_closing_BLOCK ) { $no_internal_newlines = 1; } # We're only going to handle breaking for code BLOCKS at this # (top) level. Other indentation breaks will be handled by # sub scan_list, which is better suited to dealing with them. if ($is_opening_BLOCK) { # Tentatively output this token. This is required before # calling starting_one_line_block. We may have to unstore # it, though, if we have to break before it. store_token_to_go($side_comment_follows); # Look ahead to see if we might form a one-line block my $too_long = starting_one_line_block( $j, $jmax, $level, $slevel, $ci_level, $rtokens, $rtoken_type, $rblock_type ); clear_breakpoint_undo_stack(); # to simplify the logic below, set a flag to indicate if # this opening brace is far from the keyword which introduces it my $keyword_on_same_line = 1; if ( ( $max_index_to_go >= 0 ) && ( $last_nonblank_type eq ')' ) ) { if ( $block_type =~ /^(if|else|elsif)$/ && ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) { $keyword_on_same_line = 1; } elsif ( ( $slevel < $nesting_depth_to_go[0] ) || $too_long ) { $keyword_on_same_line = 0; } } # decide if user requested break before '{' my $want_break = # use -bl flag if not a sub block of any type $block_type !~ /^sub/ ? $rOpts->{'opening-brace-on-new-line'} # use -sbl flag for a named sub block : $block_type !~ /^sub\W*$/ ? $rOpts->{'opening-sub-brace-on-new-line'} # use -asbl flag for an anonymous sub block : $rOpts->{'opening-anonymous-sub-brace-on-new-line'}; # Break before an opening '{' ... if ( # if requested $want_break # and we were unable to start looking for a block, && $index_start_one_line_block == UNDEFINED_INDEX # or if it will not be on same line as its keyword, so that # it will be outdented (eval.t, overload.t), and the user # has not insisted on keeping it on the right || ( !$keyword_on_same_line && !$rOpts->{'opening-brace-always-on-right'} ) ) { # but only if allowed unless ($no_internal_newlines) { # since we already stored this token, we must unstore it unstore_token_to_go(); # then output the line output_line_to_go(); # and now store this token at the start of a new line store_token_to_go($side_comment_follows); } } # Now update for side comment if ($side_comment_follows) { $no_internal_newlines = 1 } # now output this line unless ($no_internal_newlines) { output_line_to_go(); } } elsif ($is_closing_BLOCK) { # If there is a pending one-line block .. if ( $index_start_one_line_block != UNDEFINED_INDEX ) { # we have to terminate it if.. if ( # it is too long (final length may be different from # initial estimate). note: must allow 1 space for this token excess_line_length( $index_start_one_line_block, $max_index_to_go ) >= 0 # or if it has too many semicolons || ( $semicolons_before_block_self_destruct == 0 && $last_nonblank_type ne ';' ) ) { destroy_one_line_block(); } } # put a break before this closing curly brace if appropriate unless ( $no_internal_newlines || $index_start_one_line_block != UNDEFINED_INDEX ) { # add missing semicolon if ... # there are some tokens if ( ( $max_index_to_go > 0 ) # and we don't have one && ( $last_nonblank_type ne ';' ) # patch until some block type issues are fixed: # Do not add semi-colon for block types '{', # '}', and ';' because we cannot be sure yet # that this is a block and not an anonomyous # hash (blktype.t, blktype1.t) && ( $block_type !~ /^[\{\};]$/ ) # patch: and do not add semi-colons for recently # added block types (see tmp/semicolon.t) && ( $block_type !~ /^(switch|case|given|when|default)$/) # it seems best not to add semicolons in these # special block types: sort|map|grep && ( !$is_sort_map_grep{$block_type} ) # and we are allowed to do so. && $rOpts->{'add-semicolons'} ) { save_current_token(); $token = ';'; $type = ';'; $level = $levels_to_go[$max_index_to_go]; $slevel = $nesting_depth_to_go[$max_index_to_go]; $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go]; $ci_level = $ci_levels_to_go[$max_index_to_go]; $block_type = ""; $container_type = ""; $container_environment = ""; $type_sequence = ""; # Note - we remove any blank AFTER extracting its # parameters such as level, etc, above if ( $types_to_go[$max_index_to_go] eq 'b' ) { unstore_token_to_go(); } store_token_to_go(); note_added_semicolon(); restore_current_token(); } # then write out everything before this closing curly brace output_line_to_go(); } # Now update for side comment if ($side_comment_follows) { $no_internal_newlines = 1 } # store the closing curly brace store_token_to_go(); # ok, we just stored a closing curly brace. Often, but # not always, we want to end the line immediately. # So now we have to check for special cases. # if this '}' successfully ends a one-line block.. my $is_one_line_block = 0; my $keep_going = 0; if ( $index_start_one_line_block != UNDEFINED_INDEX ) { # Remember the type of token just before the # opening brace. It would be more general to use # a stack, but this will work for one-line blocks. $is_one_line_block = $types_to_go[$index_start_one_line_block]; # we have to actually make it by removing tentative # breaks that were set within it undo_forced_breakpoint_stack(0); set_nobreaks( $index_start_one_line_block, $max_index_to_go - 1 ); # then re-initialize for the next one-line block destroy_one_line_block(); # then decide if we want to break after the '}' .. # We will keep going to allow certain brace followers as in: # do { $ifclosed = 1; last } unless $losing; # # But make a line break if the curly ends a # significant block: if ( $is_block_without_semicolon{$block_type} # if needless semicolon follows we handle it later && $next_nonblank_token ne ';' ) { output_line_to_go() unless ($no_internal_newlines); } } # set string indicating what we need to look for brace follower # tokens if ( $block_type eq 'do' ) { $rbrace_follower = \%is_do_follower; } elsif ( $block_type =~ /^(if|elsif|unless)$/ ) { $rbrace_follower = \%is_if_brace_follower; } elsif ( $block_type eq 'else' ) { $rbrace_follower = \%is_else_brace_follower; } # added eval for borris.t elsif ($is_sort_map_grep_eval{$block_type} || $is_one_line_block eq 'G' ) { $rbrace_follower = undef; $keep_going = 1; } # anonymous sub elsif ( $block_type =~ /^sub\W*$/ ) { if ($is_one_line_block) { $rbrace_follower = \%is_anon_sub_1_brace_follower; } else { $rbrace_follower = \%is_anon_sub_brace_follower; } } # None of the above: specify what can follow a closing # brace of a block which is not an # if/elsif/else/do/sort/map/grep/eval # Testfiles: # 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl', 'break1.t else { $rbrace_follower = \%is_other_brace_follower; } # See if an elsif block is followed by another elsif or else; # complain if not. if ( $block_type eq 'elsif' ) { if ( $next_nonblank_token_type eq 'b' ) { # end of line? $looking_for_else = 1; # ok, check on next line } else { unless ( $next_nonblank_token =~ /^(elsif|else)$/ ) { write_logfile_entry("No else block :(\n"); } } } # keep going after certain block types (map,sort,grep,eval) # added eval for borris.t if ($keep_going) { # keep going } # if no more tokens, postpone decision until re-entring elsif ( ( $next_nonblank_token_type eq 'b' ) && $rOpts_add_newlines ) { unless ($rbrace_follower) { output_line_to_go() unless ($no_internal_newlines); } } elsif ($rbrace_follower) { unless ( $rbrace_follower->{$next_nonblank_token} ) { output_line_to_go() unless ($no_internal_newlines); } $rbrace_follower = undef; } else { output_line_to_go() unless ($no_internal_newlines); } } # end treatment of closing block token # handle semicolon elsif ( $type eq ';' ) { # kill one-line blocks with too many semicolons $semicolons_before_block_self_destruct--; if ( ( $semicolons_before_block_self_destruct < 0 ) || ( $semicolons_before_block_self_destruct == 0 && $next_nonblank_token_type !~ /^[b\}]$/ ) ) { destroy_one_line_block(); } # Remove unnecessary semicolons, but not after bare # blocks, where it could be unsafe if the brace is # mistokenized. if ( ( $last_nonblank_token eq '}' && ( $is_block_without_semicolon{ $last_nonblank_block_type} || $last_nonblank_block_type =~ /^sub\s+\w/ || $last_nonblank_block_type =~ /^\w+:$/ ) ) || $last_nonblank_type eq ';' ) { if ( $rOpts->{'delete-semicolons'} # don't delete ; before a # because it would promote it # to a block comment && ( $next_nonblank_token_type ne '#' ) ) { note_deleted_semicolon(); output_line_to_go() unless ( $no_internal_newlines || $index_start_one_line_block != UNDEFINED_INDEX ); next; } else { write_logfile_entry("Extra ';'\n"); } } store_token_to_go(); output_line_to_go() unless ( $no_internal_newlines || ( $rOpts_keep_interior_semicolons && $j < $jmax ) || ( $next_nonblank_token eq '}' ) ); } # handle here_doc target string elsif ( $type eq 'h' ) { $no_internal_newlines = 1; # no newlines after seeing here-target destroy_one_line_block(); store_token_to_go(); } # handle all other token types else { # if this is a blank... if ( $type eq 'b' ) { # make it just one character $token = ' ' if $rOpts_add_whitespace; # delete it if unwanted by whitespace rules # or we are deleting all whitespace my $ws = $$rwhite_space_flag[ $j + 1 ]; if ( ( defined($ws) && $ws == -1 ) || $rOpts_delete_old_whitespace ) { # unless it might make a syntax error next unless is_essential_whitespace( $last_last_nonblank_token, $last_last_nonblank_type, $tokens_to_go[$max_index_to_go], $types_to_go[$max_index_to_go], $$rtokens[ $j + 1 ], $$rtoken_type[ $j + 1 ] ); } } store_token_to_go(); } # remember two previous nonblank OUTPUT tokens if ( $type ne '#' && $type ne 'b' ) { $last_last_nonblank_token = $last_nonblank_token; $last_last_nonblank_type = $last_nonblank_type; $last_nonblank_token = $token; $last_nonblank_type = $type; $last_nonblank_block_type = $block_type; } # unset the continued-quote flag since it only applies to the # first token, and we want to resume normal formatting if # there are additional tokens on the line $in_continued_quote = 0; } # end of loop over all tokens in this 'line_of_tokens' # we have to flush .. if ( # if there is a side comment ( ( $type eq '#' ) && !$rOpts->{'delete-side-comments'} ) # if this line ends in a quote # NOTE: This is critically important for insuring that quoted lines # do not get processed by things like -sot and -sct || $in_quote # if this is a VERSION statement || $is_VERSION_statement # to keep a label on one line if that is how it is now || ( ( $type eq 'J' ) && ( $max_index_to_go == 0 ) ) # if we are instructed to keep all old line breaks || !$rOpts->{'delete-old-newlines'} ) { destroy_one_line_block(); output_line_to_go(); } # mark old line breakpoints in current output stream if ( $max_index_to_go >= 0 && !$rOpts_ignore_old_breakpoints ) { $old_breakpoint_to_go[$max_index_to_go] = 1; } } # end sub print_line_of_tokens } # end print_line_of_tokens # sub output_line_to_go sends one logical line of tokens on down the # pipeline to the VerticalAligner package, breaking the line into continuation # lines as necessary. The line of tokens is ready to go in the "to_go" # arrays. sub output_line_to_go { # debug stuff; this routine can be called from many points FORMATTER_DEBUG_FLAG_OUTPUT && do { my ( $a, $b, $c ) = caller; write_diagnostics( "OUTPUT: output_line_to_go called: $a $c $last_nonblank_type $last_nonblank_token, one_line=$index_start_one_line_block, tokens to write=$max_index_to_go\n" ); my $output_str = join "", @tokens_to_go[ 0 .. $max_index_to_go ]; write_diagnostics("$output_str\n"); }; # just set a tentative breakpoint if we might be in a one-line block if ( $index_start_one_line_block != UNDEFINED_INDEX ) { set_forced_breakpoint($max_index_to_go); return; } my $cscw_block_comment; $cscw_block_comment = add_closing_side_comment() if ( $rOpts->{'closing-side-comments'} && $max_index_to_go >= 0 ); match_opening_and_closing_tokens(); # tell the -lp option we are outputting a batch so it can close # any unfinished items in its stack finish_lp_batch(); # If this line ends in a code block brace, set breaks at any # previous closing code block braces to breakup a chain of code # blocks on one line. This is very rare but can happen for # user-defined subs. For example we might be looking at this: # BOOL { $server_data{uptime} > 0; } NUM { $server_data{load}; } STR { my $saw_good_break = 0; # flag to force breaks even if short line if ( # looking for opening or closing block brace $block_type_to_go[$max_index_to_go] # but not one of these which are never duplicated on a line: # until|while|for|if|elsif|else && !$is_block_without_semicolon{ $block_type_to_go[$max_index_to_go] } ) { my $lev = $nesting_depth_to_go[$max_index_to_go]; # Walk backwards from the end and # set break at any closing block braces at the same level. # But quit if we are not in a chain of blocks. for ( my $i = $max_index_to_go - 1 ; $i >= 0 ; $i-- ) { last if ( $levels_to_go[$i] < $lev ); # stop at a lower level next if ( $levels_to_go[$i] > $lev ); # skip past higher level if ( $block_type_to_go[$i] ) { if ( $tokens_to_go[$i] eq '}' ) { set_forced_breakpoint($i); $saw_good_break = 1; } } # quit if we see anything besides words, function, blanks # at this level elsif ( $types_to_go[$i] !~ /^[\(\)Gwib]$/ ) { last } } } my $imin = 0; my $imax = $max_index_to_go; # trim any blank tokens if ( $max_index_to_go >= 0 ) { if ( $types_to_go[$imin] eq 'b' ) { $imin++ } if ( $types_to_go[$imax] eq 'b' ) { $imax-- } } # anything left to write? if ( $imin <= $imax ) { # add a blank line before certain key types if ( $last_line_leading_type !~ /^[#b]/ ) { my $want_blank = 0; my $leading_token = $tokens_to_go[$imin]; my $leading_type = $types_to_go[$imin]; # blank lines before subs except declarations and one-liners # MCONVERSION LOCATION - for sub tokenization change if ( $leading_token =~ /^(sub\s)/ && $leading_type eq 'i' ) { $want_blank = ( $rOpts->{'blanks-before-subs'} ) && ( terminal_type( \@types_to_go, \@block_type_to_go, $imin, $imax ) !~ /^[\;\}]$/ ); } # break before all package declarations # MCONVERSION LOCATION - for tokenizaton change elsif ($leading_token =~ /^(package\s)/ && $leading_type eq 'i' ) { $want_blank = ( $rOpts->{'blanks-before-subs'} ); } # break before certain key blocks except one-liners if ( $leading_token =~ /^(BEGIN|END)$/ && $leading_type eq 'k' ) { $want_blank = ( $rOpts->{'blanks-before-subs'} ) && ( terminal_type( \@types_to_go, \@block_type_to_go, $imin, $imax ) ne '}' ); } # Break before certain block types if we haven't had a # break at this level for a while. This is the # difficult decision.. elsif ($leading_token =~ /^(unless|if|while|until|for|foreach)$/ && $leading_type eq 'k' ) { my $lc = $nonblank_lines_at_depth[$last_line_leading_level]; if ( !defined($lc) ) { $lc = 0 } $want_blank = $rOpts->{'blanks-before-blocks'} && $lc >= $rOpts->{'long-block-line-count'} && $file_writer_object->get_consecutive_nonblank_lines() >= $rOpts->{'long-block-line-count'} && ( terminal_type( \@types_to_go, \@block_type_to_go, $imin, $imax ) ne '}' ); } if ($want_blank) { # future: send blank line down normal path to VerticalAligner Perl::Tidy::VerticalAligner::flush(); $file_writer_object->write_blank_code_line(); } } # update blank line variables and count number of consecutive # non-blank, non-comment lines at this level $last_last_line_leading_level = $last_line_leading_level; $last_line_leading_level = $levels_to_go[$imin]; if ( $last_line_leading_level < 0 ) { $last_line_leading_level = 0 } $last_line_leading_type = $types_to_go[$imin]; if ( $last_line_leading_level == $last_last_line_leading_level && $last_line_leading_type ne 'b' && $last_line_leading_type ne '#' && defined( $nonblank_lines_at_depth[$last_line_leading_level] ) ) { $nonblank_lines_at_depth[$last_line_leading_level]++; } else { $nonblank_lines_at_depth[$last_line_leading_level] = 1; } FORMATTER_DEBUG_FLAG_FLUSH && do { my ( $package, $file, $line ) = caller; print "FLUSH: flushing from $package $file $line, types= $types_to_go[$imin] to $types_to_go[$imax]\n"; }; # add a couple of extra terminal blank tokens pad_array_to_go(); # set all forced breakpoints for good list formatting my $is_long_line = excess_line_length( $imin, $max_index_to_go ) > 0; if ( $max_index_to_go > 0 && ( $is_long_line || $old_line_count_in_batch > 1 || is_unbalanced_batch() || ( $comma_count_in_batch && ( $rOpts_maximum_fields_per_table > 0 || $rOpts_comma_arrow_breakpoints == 0 ) ) ) ) { $saw_good_break ||= scan_list(); } # let $ri_first and $ri_last be references to lists of # first and last tokens of line fragments to output.. my ( $ri_first, $ri_last ); # write a single line if.. if ( # we aren't allowed to add any newlines !$rOpts_add_newlines # or, we don't already have an interior breakpoint # and we didn't see a good breakpoint || ( !$forced_breakpoint_count && !$saw_good_break # and this line is 'short' && !$is_long_line ) ) { @$ri_first = ($imin); @$ri_last = ($imax); } # otherwise use multiple lines else { ( $ri_first, $ri_last, my $colon_count ) = set_continuation_breaks($saw_good_break); break_all_chain_tokens( $ri_first, $ri_last ); break_equals( $ri_first, $ri_last ); # now we do a correction step to clean this up a bit # (The only time we would not do this is for debugging) if ( $rOpts->{'recombine'} ) { ( $ri_first, $ri_last ) = recombine_breakpoints( $ri_first, $ri_last ); } insert_final_breaks( $ri_first, $ri_last ) if $colon_count; } # do corrector step if -lp option is used my $do_not_pad = 0; if ($rOpts_line_up_parentheses) { $do_not_pad = correct_lp_indentation( $ri_first, $ri_last ); } send_lines_to_vertical_aligner( $ri_first, $ri_last, $do_not_pad ); } prepare_for_new_input_lines(); # output any new -cscw block comment if ($cscw_block_comment) { flush(); $file_writer_object->write_code_line( $cscw_block_comment . "\n" ); } } sub note_added_semicolon { $last_added_semicolon_at = $input_line_number; if ( $added_semicolon_count == 0 ) { $first_added_semicolon_at = $last_added_semicolon_at; } $added_semicolon_count++; write_logfile_entry("Added ';' here\n"); } sub note_deleted_semicolon { $last_deleted_semicolon_at = $input_line_number; if ( $deleted_semicolon_count == 0 ) { $first_deleted_semicolon_at = $last_deleted_semicolon_at; } $deleted_semicolon_count++; write_logfile_entry("Deleted unnecessary ';'\n"); # i hope ;) } sub note_embedded_tab { $embedded_tab_count++; $last_embedded_tab_at = $input_line_number; if ( !$first_embedded_tab_at ) { $first_embedded_tab_at = $last_embedded_tab_at; } if ( $embedded_tab_count <= MAX_NAG_MESSAGES ) { write_logfile_entry("Embedded tabs in quote or pattern\n"); } } sub starting_one_line_block { # after seeing an opening curly brace, look for the closing brace # and see if the entire block will fit on a line. This routine is # not always right because it uses the old whitespace, so a check # is made later (at the closing brace) to make sure we really # have a one-line block. We have to do this preliminary check, # though, because otherwise we would always break at a semicolon # within a one-line block if the block contains multiple statements. my ( $j, $jmax, $level, $slevel, $ci_level, $rtokens, $rtoken_type, $rblock_type ) = @_; # kill any current block - we can only go 1 deep destroy_one_line_block(); # return value: # 1=distance from start of block to opening brace exceeds line length # 0=otherwise my $i_start = 0; # shouldn't happen: there must have been a prior call to # store_token_to_go to put the opening brace in the output stream if ( $max_index_to_go < 0 ) { warning("program bug: store_token_to_go called incorrectly\n"); report_definite_bug(); } else { # cannot use one-line blocks with cuddled else else/elsif lines if ( ( $tokens_to_go[0] eq '}' ) && $rOpts_cuddled_else ) { return 0; } } my $block_type = $$rblock_type[$j]; # find the starting keyword for this block (such as 'if', 'else', ...) if ( $block_type =~ /^[\{\}\;\:]$/ ) { $i_start = $max_index_to_go; } elsif ( $last_last_nonblank_token_to_go eq ')' ) { # For something like "if (xxx) {", the keyword "if" will be # just after the most recent break. This will be 0 unless # we have just killed a one-line block and are starting another. # (doif.t) $i_start = $index_max_forced_break + 1; if ( $types_to_go[$i_start] eq 'b' ) { $i_start++; } unless ( $tokens_to_go[$i_start] eq $block_type ) { return 0; } } # the previous nonblank token should start these block types elsif ( ( $last_last_nonblank_token_to_go eq $block_type ) || ( $block_type =~ /^sub/ && $last_last_nonblank_token_to_go =~ /^sub/ ) ) { $i_start = $last_last_nonblank_index_to_go; } # patch for SWITCH/CASE to retain one-line case/when blocks elsif ( $block_type eq 'case' || $block_type eq 'when' ) { $i_start = $index_max_forced_break + 1; if ( $types_to_go[$i_start] eq 'b' ) { $i_start++; } unless ( $tokens_to_go[$i_start] eq $block_type ) { return 0; } } else { return 1; } my $pos = total_line_length( $i_start, $max_index_to_go ) - 1; my $i; # see if length is too long to even start if ( $pos > $rOpts_maximum_line_length ) { return 1; } for ( $i = $j + 1 ; $i <= $jmax ; $i++ ) { # old whitespace could be arbitrarily large, so don't use it if ( $$rtoken_type[$i] eq 'b' ) { $pos += 1 } else { $pos += length( $$rtokens[$i] ) } # Return false result if we exceed the maximum line length, if ( $pos > $rOpts_maximum_line_length ) { return 0; } # or encounter another opening brace before finding the closing brace. elsif ($$rtokens[$i] eq '{' && $$rtoken_type[$i] eq '{' && $$rblock_type[$i] ) { return 0; } # if we find our closing brace.. elsif ($$rtokens[$i] eq '}' && $$rtoken_type[$i] eq '}' && $$rblock_type[$i] ) { # be sure any trailing comment also fits on the line my $i_nonblank = ( $$rtoken_type[ $i + 1 ] eq 'b' ) ? $i + 2 : $i + 1; if ( $$rtoken_type[$i_nonblank] eq '#' ) { $pos += length( $$rtokens[$i_nonblank] ); if ( $i_nonblank > $i + 1 ) { $pos += length( $$rtokens[ $i + 1 ] ); } if ( $pos > $rOpts_maximum_line_length ) { return 0; } } # ok, it's a one-line block create_one_line_block( $i_start, 20 ); return 0; } # just keep going for other characters else { } } # Allow certain types of new one-line blocks to form by joining # input lines. These can be safely done, but for other block types, # we keep old one-line blocks but do not form new ones. It is not # always a good idea to make as many one-line blocks as possible, # so other types are not done. The user can always use -mangle. if ( $is_sort_map_grep_eval{$block_type} ) { create_one_line_block( $i_start, 1 ); } return 0; } sub unstore_token_to_go { # remove most recent token from output stream if ( $max_index_to_go > 0 ) { $max_index_to_go--; } else { $max_index_to_go = UNDEFINED_INDEX; } } sub want_blank_line { flush(); $file_writer_object->want_blank_line(); } sub write_unindented_line { flush(); $file_writer_object->write_line( $_[0] ); } sub undo_ci { # Undo continuation indentation in certain sequences # For example, we can undo continuation indation in sort/map/grep chains # my $dat1 = pack( "n*", # map { $_, $lookup->{$_} } # sort { $a <=> $b } # grep { $lookup->{$_} ne $default } keys %$lookup ); # To align the map/sort/grep keywords like this: # my $dat1 = pack( "n*", # map { $_, $lookup->{$_} } # sort { $a <=> $b } # grep { $lookup->{$_} ne $default } keys %$lookup ); my ( $ri_first, $ri_last ) = @_; my ( $line_1, $line_2, $lev_last ); my $this_line_is_semicolon_terminated; my $max_line = @$ri_first - 1; # looking at each line of this batch.. # We are looking at leading tokens and looking for a sequence # all at the same level and higher level than enclosing lines. foreach my $line ( 0 .. $max_line ) { my $ibeg = $$ri_first[$line]; my $lev = $levels_to_go[$ibeg]; if ( $line > 0 ) { # if we have started a chain.. if ($line_1) { # see if it continues.. if ( $lev == $lev_last ) { if ( $types_to_go[$ibeg] eq 'k' && $is_sort_map_grep{ $tokens_to_go[$ibeg] } ) { # chain continues... # check for chain ending at end of a a statement if ( $line == $max_line ) { # see of this line ends a statement my $iend = $$ri_last[$line]; $this_line_is_semicolon_terminated = $types_to_go[$iend] eq ';' # with possible side comment || ( $types_to_go[$iend] eq '#' && $iend - $ibeg >= 2 && $types_to_go[ $iend - 2 ] eq ';' && $types_to_go[ $iend - 1 ] eq 'b' ); } $line_2 = $line if ($this_line_is_semicolon_terminated); } else { # kill chain $line_1 = undef; } } elsif ( $lev < $lev_last ) { # chain ends with previous line $line_2 = $line - 1; } elsif ( $lev > $lev_last ) { # kill chain $line_1 = undef; } # undo the continuation indentation if a chain ends if ( defined($line_2) && defined($line_1) ) { my $continuation_line_count = $line_2 - $line_1 + 1; @ci_levels_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] = (0) x ($continuation_line_count); @leading_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ] = @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $line_2 ] ]; $line_1 = undef; } } # not in a chain yet.. else { # look for start of a new sort/map/grep chain if ( $lev > $lev_last ) { if ( $types_to_go[$ibeg] eq 'k' && $is_sort_map_grep{ $tokens_to_go[$ibeg] } ) { $line_1 = $line; } } } } $lev_last = $lev; } } sub undo_lp_ci { # If there is a single, long parameter within parens, like this: # # $self->command( "/msg " # . $infoline->chan # . " You said $1, but did you know that it's square was " # . $1 * $1 . " ?" ); # # we can remove the continuation indentation of the 2nd and higher lines # to achieve this effect, which is more pleasing: # # $self->command("/msg " # . $infoline->chan # . " You said $1, but did you know that it's square was " # . $1 * $1 . " ?"); my ( $line_open, $i_start, $closing_index, $ri_first, $ri_last ) = @_; my $max_line = @$ri_first - 1; # must be multiple lines return unless $max_line > $line_open; my $lev_start = $levels_to_go[$i_start]; my $ci_start_plus = 1 + $ci_levels_to_go[$i_start]; # see if all additional lines in this container have continuation # indentation my $n; my $line_1 = 1 + $line_open; for ( $n = $line_1 ; $n <= $max_line ; ++$n ) { my $ibeg = $$ri_first[$n]; my $iend = $$ri_last[$n]; if ( $ibeg eq $closing_index ) { $n--; last } return if ( $lev_start != $levels_to_go[$ibeg] ); return if ( $ci_start_plus != $ci_levels_to_go[$ibeg] ); last if ( $closing_index <= $iend ); } # we can reduce the indentation of all continuation lines my $continuation_line_count = $n - $line_open; @ci_levels_to_go[ @$ri_first[ $line_1 .. $n ] ] = (0) x ($continuation_line_count); @leading_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ] = @reduced_spaces_to_go[ @$ri_first[ $line_1 .. $n ] ]; } sub set_logical_padding { # Look at a batch of lines and see if extra padding can improve the # alignment when there are certain leading operators. Here is an # example, in which some extra space is introduced before # '( $year' to make it line up with the subsequent lines: # # if ( ( $Year < 1601 ) # || ( $Year > 2899 ) # || ( $EndYear < 1601 ) # || ( $EndYear > 2899 ) ) # { # &Error_OutOfRange; # } # my ( $ri_first, $ri_last ) = @_; my $max_line = @$ri_first - 1; my ( $ibeg, $ibeg_next, $ibegm, $iend, $iendm, $ipad, $line, $pad_spaces, $tok_next, $type_next, $has_leading_op_next, $has_leading_op ); # looking at each line of this batch.. foreach $line ( 0 .. $max_line - 1 ) { # see if the next line begins with a logical operator $ibeg = $$ri_first[$line]; $iend = $$ri_last[$line]; $ibeg_next = $$ri_first[ $line + 1 ]; $tok_next = $tokens_to_go[$ibeg_next]; $type_next = $types_to_go[$ibeg_next]; $has_leading_op_next = ( $tok_next =~ /^\w/ ) ? $is_chain_operator{$tok_next} # + - * / : ? && || : $is_chain_operator{$type_next}; # and, or next unless ($has_leading_op_next); # next line must not be at lesser depth next if ( $nesting_depth_to_go[$ibeg] > $nesting_depth_to_go[$ibeg_next] ); # identify the token in this line to be padded on the left $ipad = undef; # handle lines at same depth... if ( $nesting_depth_to_go[$ibeg] == $nesting_depth_to_go[$ibeg_next] ) { # if this is not first line of the batch ... if ( $line > 0 ) { # and we have leading operator.. next if $has_leading_op; # Introduce padding if.. # 1. the previous line is at lesser depth, or # 2. the previous line ends in an assignment # 3. the previous line ends in a 'return' # 4. the previous line ends in a comma # Example 1: previous line at lesser depth # if ( ( $Year < 1601 ) # <- we are here but # || ( $Year > 2899 ) # list has not yet # || ( $EndYear < 1601 ) # collapsed vertically # || ( $EndYear > 2899 ) ) # { # # Example 2: previous line ending in assignment: # $leapyear = # $year % 4 ? 0 # <- We are here # : $year % 100 ? 1 # : $year % 400 ? 0 # : 1; # # Example 3: previous line ending in comma: # push @expr, # /test/ ? undef # : eval($_) ? 1 # : eval($_) ? 1 # : 0; # be sure levels agree (do not indent after an indented 'if') next if ( $levels_to_go[$ibeg] ne $levels_to_go[$ibeg_next] ); # allow padding on first line after a comma but only if: # (1) this is line 2 and # (2) there are at more than three lines and # (3) lines 3 and 4 have the same leading operator # These rules try to prevent padding within a long # comma-separated list. my $ok_comma; if ( $types_to_go[$iendm] eq ',' && $line == 1 && $max_line > 2 ) { my $ibeg_next_next = $$ri_first[ $line + 2 ]; my $tok_next_next = $tokens_to_go[$ibeg_next_next]; $ok_comma = $tok_next_next eq $tok_next; } next unless ( $is_assignment{ $types_to_go[$iendm] } || $ok_comma || ( $nesting_depth_to_go[$ibegm] < $nesting_depth_to_go[$ibeg] ) || ( $types_to_go[$iendm] eq 'k' && $tokens_to_go[$iendm] eq 'return' ) ); # we will add padding before the first token $ipad = $ibeg; } # for first line of the batch.. else { # WARNING: Never indent if first line is starting in a # continued quote, which would change the quote. next if $starting_in_quote; # if this is text after closing '}' # then look for an interior token to pad if ( $types_to_go[$ibeg] eq '}' ) { } # otherwise, we might pad if it looks really good else { # we might pad token $ibeg, so be sure that it # is at the same depth as the next line. next if ( $nesting_depth_to_go[$ibeg] != $nesting_depth_to_go[$ibeg_next] ); # We can pad on line 1 of a statement if at least 3 # lines will be aligned. Otherwise, it # can look very confusing. # We have to be careful not to pad if there are too few # lines. The current rule is: # (1) in general we require at least 3 consecutive lines # with the same leading chain operator token, # (2) but an exception is that we only require two lines # with leading colons if there are no more lines. For example, # the first $i in the following snippet would get padding # by the second rule: # # $i == 1 ? ( "First", "Color" ) # : $i == 2 ? ( "Then", "Rarity" ) # : ( "Then", "Name" ); if ( $max_line > 1 ) { my $leading_token = $tokens_to_go[$ibeg_next]; my $tokens_differ; # never indent line 1 of a '.' series because # previous line is most likely at same level. # TODO: we should also look at the leasing_spaces # of the last output line and skip if it is same # as this line. next if ( $leading_token eq '.' ); my $count = 1; foreach my $l ( 2 .. 3 ) { last if ( $line + $l > $max_line ); my $ibeg_next_next = $$ri_first[ $line + $l ]; if ( $tokens_to_go[$ibeg_next_next] ne $leading_token ) { $tokens_differ = 1; last; } $count++; } next if ($tokens_differ); next if ( $count < 3 && $leading_token ne ':' ); $ipad = $ibeg; } else { next; } } } } # find interior token to pad if necessary if ( !defined($ipad) ) { for ( my $i = $ibeg ; ( $i < $iend ) && !$ipad ; $i++ ) { # find any unclosed container next unless ( $type_sequence_to_go[$i] && $mate_index_to_go[$i] > $iend ); # find next nonblank token to pad $ipad = $i + 1; if ( $types_to_go[$ipad] eq 'b' ) { $ipad++; last if ( $ipad > $iend ); } } last unless $ipad; } # next line must not be at greater depth my $iend_next = $$ri_last[ $line + 1 ]; next if ( $nesting_depth_to_go[ $iend_next + 1 ] > $nesting_depth_to_go[$ipad] ); # lines must be somewhat similar to be padded.. my $inext_next = $ibeg_next + 1; if ( $types_to_go[$inext_next] eq 'b' ) { $inext_next++; } my $type = $types_to_go[$ipad]; my $type_next = $types_to_go[ $ipad + 1 ]; # see if there are multiple continuation lines my $logical_continuation_lines = 1; if ( $line + 2 <= $max_line ) { my $leading_token = $tokens_to_go[$ibeg_next]; my $ibeg_next_next = $$ri_first[ $line + 2 ]; if ( $tokens_to_go[$ibeg_next_next] eq $leading_token && $nesting_depth_to_go[$ibeg_next] eq $nesting_depth_to_go[$ibeg_next_next] ) { $logical_continuation_lines++; } } # see if leading types match my $types_match = $types_to_go[$inext_next] eq $type; my $matches_without_bang; # if first line has leading ! then compare the following token if ( !$types_match && $type eq '!' ) { $types_match = $matches_without_bang = $types_to_go[$inext_next] eq $types_to_go[ $ipad + 1 ]; } if ( # either we have multiple continuation lines to follow # and we are not padding the first token ( $logical_continuation_lines > 1 && $ipad > 0 ) # or.. || ( # types must match $types_match # and keywords must match if keyword && !( $type eq 'k' && $tokens_to_go[$ipad] ne $tokens_to_go[$inext_next] ) ) ) { #----------------------begin special checks-------------- # # SPECIAL CHECK 1: # A check is needed before we can make the pad. # If we are in a list with some long items, we want each # item to stand out. So in the following example, the # first line begining with '$casefold->' would look good # padded to align with the next line, but then it # would be indented more than the last line, so we # won't do it. # # ok( # $casefold->{code} eq '0041' # && $casefold->{status} eq 'C' # && $casefold->{mapping} eq '0061', # 'casefold 0x41' # ); # # Note: # It would be faster, and almost as good, to use a comma # count, and not pad if comma_count > 1 and the previous # line did not end with a comma. # my $ok_to_pad = 1; my $ibg = $$ri_first[ $line + 1 ]; my $depth = $nesting_depth_to_go[ $ibg + 1 ]; # just use simplified formula for leading spaces to avoid # needless sub calls my $lsp = $levels_to_go[$ibg] + $ci_levels_to_go[$ibg]; # look at each line beyond the next .. my $l = $line + 1; foreach $l ( $line + 2 .. $max_line ) { my $ibg = $$ri_first[$l]; # quit looking at the end of this container last if ( $nesting_depth_to_go[ $ibg + 1 ] < $depth ) || ( $nesting_depth_to_go[$ibg] < $depth ); # cannot do the pad if a later line would be # outdented more if ( $levels_to_go[$ibg] + $ci_levels_to_go[$ibg] < $lsp ) { $ok_to_pad = 0; last; } } # don't pad if we end in a broken list if ( $l == $max_line ) { my $i2 = $$ri_last[$l]; if ( $types_to_go[$i2] eq '#' ) { my $i1 = $$ri_first[$l]; next if ( terminal_type( \@types_to_go, \@block_type_to_go, $i1, $i2 ) eq ',' ); } } # SPECIAL CHECK 2: # a minus may introduce a quoted variable, and we will # add the pad only if this line begins with a bare word, # such as for the word 'Button' here: # [ # Button => "Print letter \"~$_\"", # -command => [ sub { print "$_[0]\n" }, $_ ], # -accelerator => "Meta+$_" # ]; # # On the other hand, if 'Button' is quoted, it looks best # not to pad: # [ # 'Button' => "Print letter \"~$_\"", # -command => [ sub { print "$_[0]\n" }, $_ ], # -accelerator => "Meta+$_" # ]; if ( $types_to_go[$ibeg_next] eq 'm' ) { $ok_to_pad = 0 if $types_to_go[$ibeg] eq 'Q'; } next unless $ok_to_pad; #----------------------end special check--------------- my $length_1 = total_line_length( $ibeg, $ipad - 1 ); my $length_2 = total_line_length( $ibeg_next, $inext_next - 1 ); $pad_spaces = $length_2 - $length_1; # If the first line has a leading ! and the second does # not, then remove one space to try to align the next # leading characters, which are often the same. For example: # if ( !$ts # || $ts == $self->Holder # || $self->Holder->Type eq "Arena" ) # # This usually helps readability, but if there are subsequent # ! operators things will still get messed up. For example: # # if ( !exists $Net::DNS::typesbyname{$qtype} # && exists $Net::DNS::classesbyname{$qtype} # && !exists $Net::DNS::classesbyname{$qclass} # && exists $Net::DNS::typesbyname{$qclass} ) # We can't fix that. if ($matches_without_bang) { $pad_spaces-- } # make sure this won't change if -lp is used my $indentation_1 = $leading_spaces_to_go[$ibeg]; if ( ref($indentation_1) ) { if ( $indentation_1->get_RECOVERABLE_SPACES() == 0 ) { my $indentation_2 = $leading_spaces_to_go[$ibeg_next]; unless ( $indentation_2->get_RECOVERABLE_SPACES() == 0 ) { $pad_spaces = 0; } } } # we might be able to handle a pad of -1 by removing a blank # token if ( $pad_spaces < 0 ) { if ( $pad_spaces == -1 ) { if ( $ipad > $ibeg && $types_to_go[ $ipad - 1 ] eq 'b' ) { $tokens_to_go[ $ipad - 1 ] = ''; } } $pad_spaces = 0; } # now apply any padding for alignment if ( $ipad >= 0 && $pad_spaces ) { my $length_t = total_line_length( $ibeg, $iend ); if ( $pad_spaces + $length_t <= $rOpts_maximum_line_length ) { $tokens_to_go[$ipad] = ' ' x $pad_spaces . $tokens_to_go[$ipad]; } } } } continue { $iendm = $iend; $ibegm = $ibeg; $has_leading_op = $has_leading_op_next; } # end of loop over lines return; } sub correct_lp_indentation { # When the -lp option is used, we need to make a last pass through # each line to correct the indentation positions in case they differ # from the predictions. This is necessary because perltidy uses a # predictor/corrector method for aligning with opening parens. The # predictor is usually good, but sometimes stumbles. The corrector # tries to patch things up once the actual opening paren locations # are known. my ( $ri_first, $ri_last ) = @_; my $do_not_pad = 0; # Note on flag '$do_not_pad': # We want to avoid a situation like this, where the aligner inserts # whitespace before the '=' to align it with a previous '=', because # otherwise the parens might become mis-aligned in a situation like # this, where the '=' has become aligned with the previous line, # pushing the opening '(' forward beyond where we want it. # # $mkFloor::currentRoom = ''; # $mkFloor::c_entry = $c->Entry( # -width => '10', # -relief => 'sunken', # ... # ); # # We leave it to the aligner to decide how to do this. # first remove continuation indentation if appropriate my $max_line = @$ri_first - 1; # looking at each line of this batch.. my ( $ibeg, $iend ); my $line; foreach $line ( 0 .. $max_line ) { $ibeg = $$ri_first[$line]; $iend = $$ri_last[$line]; # looking at each token in this output line.. my $i; foreach $i ( $ibeg .. $iend ) { # How many space characters to place before this token # for special alignment. Actual padding is done in the # continue block. # looking for next unvisited indentation item my $indentation = $leading_spaces_to_go[$i]; if ( !$indentation->get_MARKED() ) { $indentation->set_MARKED(1); # looking for indentation item for which we are aligning # with parens, braces, and brackets next unless ( $indentation->get_ALIGN_PAREN() ); # skip closed container on this line if ( $i > $ibeg ) { my $im = $i - 1; if ( $types_to_go[$im] eq 'b' && $im > $ibeg ) { $im-- } if ( $type_sequence_to_go[$im] && $mate_index_to_go[$im] <= $iend ) { next; } } if ( $line == 1 && $i == $ibeg ) { $do_not_pad = 1; } # Ok, let's see what the error is and try to fix it my $actual_pos; my $predicted_pos = $indentation->get_SPACES(); if ( $i > $ibeg ) { # token is mid-line - use length to previous token $actual_pos = total_line_length( $ibeg, $i - 1 ); # for mid-line token, we must check to see if all # additional lines have continuation indentation, # and remove it if so. Otherwise, we do not get # good alignment. my $closing_index = $indentation->get_CLOSED(); if ( $closing_index > $iend ) { my $ibeg_next = $$ri_first[ $line + 1 ]; if ( $ci_levels_to_go[$ibeg_next] > 0 ) { undo_lp_ci( $line, $i, $closing_index, $ri_first, $ri_last ); } } } elsif ( $line > 0 ) { # handle case where token starts a new line; # use length of previous line my $ibegm = $$ri_first[ $line - 1 ]; my $iendm = $$ri_last[ $line - 1 ]; $actual_pos = total_line_length( $ibegm, $iendm ); # follow -pt style ++$actual_pos if ( $types_to_go[ $iendm + 1 ] eq 'b' ); } else { # token is first character of first line of batch $actual_pos = $predicted_pos; } my $move_right = $actual_pos - $predicted_pos; # done if no error to correct (gnu2.t) if ( $move_right == 0 ) { $indentation->set_RECOVERABLE_SPACES($move_right); next; } # if we have not seen closure for this indentation in # this batch, we can only pass on a request to the # vertical aligner my $closing_index = $indentation->get_CLOSED(); if ( $closing_index < 0 ) { $indentation->set_RECOVERABLE_SPACES($move_right); next; } # If necessary, look ahead to see if there is really any # leading whitespace dependent on this whitespace, and # also find the longest line using this whitespace. # Since it is always safe to move left if there are no # dependents, we only need to do this if we may have # dependent nodes or need to move right. my $right_margin = 0; my $have_child = $indentation->get_HAVE_CHILD(); my %saw_indentation; my $line_count = 1; $saw_indentation{$indentation} = $indentation; if ( $have_child || $move_right > 0 ) { $have_child = 0; my $max_length = 0; if ( $i == $ibeg ) { $max_length = total_line_length( $ibeg, $iend ); } # look ahead at the rest of the lines of this batch.. my $line_t; foreach $line_t ( $line + 1 .. $max_line ) { my $ibeg_t = $$ri_first[$line_t]; my $iend_t = $$ri_last[$line_t]; last if ( $closing_index <= $ibeg_t ); # remember all different indentation objects my $indentation_t = $leading_spaces_to_go[$ibeg_t]; $saw_indentation{$indentation_t} = $indentation_t; $line_count++; # remember longest line in the group my $length_t = total_line_length( $ibeg_t, $iend_t ); if ( $length_t > $max_length ) { $max_length = $length_t; } } $right_margin = $rOpts_maximum_line_length - $max_length; if ( $right_margin < 0 ) { $right_margin = 0 } } my $first_line_comma_count = grep { $_ eq ',' } @types_to_go[ $ibeg .. $iend ]; my $comma_count = $indentation->get_COMMA_COUNT(); my $arrow_count = $indentation->get_ARROW_COUNT(); # This is a simple approximate test for vertical alignment: # if we broke just after an opening paren, brace, bracket, # and there are 2 or more commas in the first line, # and there are no '=>'s, # then we are probably vertically aligned. We could set # an exact flag in sub scan_list, but this is good # enough. my $indentation_count = keys %saw_indentation; my $is_vertically_aligned = ( $i == $ibeg && $first_line_comma_count > 1 && $indentation_count == 1 && ( $arrow_count == 0 || $arrow_count == $line_count ) ); # Make the move if possible .. if ( # we can always move left $move_right < 0 # but we should only move right if we are sure it will # not spoil vertical alignment || ( $comma_count == 0 ) || ( $comma_count > 0 && !$is_vertically_aligned ) ) { my $move = ( $move_right <= $right_margin ) ? $move_right : $right_margin; foreach ( keys %saw_indentation ) { $saw_indentation{$_} ->permanently_decrease_AVAILABLE_SPACES( -$move ); } } # Otherwise, record what we want and the vertical aligner # will try to recover it. else { $indentation->set_RECOVERABLE_SPACES($move_right); } } } } return $do_not_pad; } # flush is called to output any tokens in the pipeline, so that # an alternate source of lines can be written in the correct order sub flush { destroy_one_line_block(); output_line_to_go(); Perl::Tidy::VerticalAligner::flush(); } sub reset_block_text_accumulator { # save text after 'if' and 'elsif' to append after 'else' if ($accumulating_text_for_block) { if ( $accumulating_text_for_block =~ /^(if|elsif)$/ ) { push @{$rleading_block_if_elsif_text}, $leading_block_text; } } $accumulating_text_for_block = ""; $leading_block_text = ""; $leading_block_text_level = 0; $leading_block_text_length_exceeded = 0; $leading_block_text_line_number = 0; $leading_block_text_line_length = 0; } sub set_block_text_accumulator { my $i = shift; $accumulating_text_for_block = $tokens_to_go[$i]; if ( $accumulating_text_for_block !~ /^els/ ) { $rleading_block_if_elsif_text = []; } $leading_block_text = ""; $leading_block_text_level = $levels_to_go[$i]; $leading_block_text_line_number = $vertical_aligner_object->get_output_line_number(); $leading_block_text_length_exceeded = 0; # this will contain the column number of the last character # of the closing side comment $leading_block_text_line_length = length($accumulating_text_for_block) + length( $rOpts->{'closing-side-comment-prefix'} ) + $leading_block_text_level * $rOpts_indent_columns + 3; } sub accumulate_block_text { my $i = shift; # accumulate leading text for -csc, ignoring any side comments if ( $accumulating_text_for_block && !$leading_block_text_length_exceeded && $types_to_go[$i] ne '#' ) { my $added_length = length( $tokens_to_go[$i] ); $added_length += 1 if $i == 0; my $new_line_length = $leading_block_text_line_length + $added_length; # we can add this text if we don't exceed some limits.. if ( # we must not have already exceeded the text length limit length($leading_block_text) < $rOpts_closing_side_comment_maximum_text # and either: # the new total line length must be below the line length limit # or the new length must be below the text length limit # (ie, we may allow one token to exceed the text length limit) && ( $new_line_length < $rOpts_maximum_line_length || length($leading_block_text) + $added_length < $rOpts_closing_side_comment_maximum_text ) # UNLESS: we are adding a closing paren before the brace we seek. # This is an attempt to avoid situations where the ... to be # added are longer than the omitted right paren, as in: # foreach my $item (@a_rather_long_variable_name_here) { # &whatever; # } ## end foreach my $item (@a_rather_long_variable_name_here... || ( $tokens_to_go[$i] eq ')' && ( ( $i + 1 <= $max_index_to_go && $block_type_to_go[ $i + 1 ] eq $accumulating_text_for_block ) || ( $i + 2 <= $max_index_to_go && $block_type_to_go[ $i + 2 ] eq $accumulating_text_for_block ) ) ) ) { # add an extra space at each newline if ( $i == 0 ) { $leading_block_text .= ' ' } # add the token text $leading_block_text .= $tokens_to_go[$i]; $leading_block_text_line_length = $new_line_length; } # show that text was truncated if necessary elsif ( $types_to_go[$i] ne 'b' ) { $leading_block_text_length_exceeded = 1; $leading_block_text .= '...'; } } } { my %is_if_elsif_else_unless_while_until_for_foreach; BEGIN { # These block types may have text between the keyword and opening # curly. Note: 'else' does not, but must be included to allow trailing # if/elsif text to be appended. # patch for SWITCH/CASE: added 'case' and 'when' @_ = qw(if elsif else unless while until for foreach case when); @is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_); } sub accumulate_csc_text { # called once per output buffer when -csc is used. Accumulates # the text placed after certain closing block braces. # Defines and returns the following for this buffer: my $block_leading_text = ""; # the leading text of the last '}' my $rblock_leading_if_elsif_text; my $i_block_leading_text = -1; # index of token owning block_leading_text my $block_line_count = 100; # how many lines the block spans my $terminal_type = 'b'; # type of last nonblank token my $i_terminal = 0; # index of last nonblank token my $terminal_block_type = ""; for my $i ( 0 .. $max_index_to_go ) { my $type = $types_to_go[$i]; my $block_type = $block_type_to_go[$i]; my $token = $tokens_to_go[$i]; # remember last nonblank token type if ( $type ne '#' && $type ne 'b' ) { $terminal_type = $type; $terminal_block_type = $block_type; $i_terminal = $i; } my $type_sequence = $type_sequence_to_go[$i]; if ( $block_type && $type_sequence ) { if ( $token eq '}' ) { # restore any leading text saved when we entered this block if ( defined( $block_leading_text{$type_sequence} ) ) { ( $block_leading_text, $rblock_leading_if_elsif_text ) = @{ $block_leading_text{$type_sequence} }; $i_block_leading_text = $i; delete $block_leading_text{$type_sequence}; $rleading_block_if_elsif_text = $rblock_leading_if_elsif_text; } # if we run into a '}' then we probably started accumulating # at something like a trailing 'if' clause..no harm done. if ( $accumulating_text_for_block && $levels_to_go[$i] <= $leading_block_text_level ) { my $lev = $levels_to_go[$i]; reset_block_text_accumulator(); } if ( defined( $block_opening_line_number{$type_sequence} ) ) { my $output_line_number = $vertical_aligner_object->get_output_line_number(); $block_line_count = $output_line_number - $block_opening_line_number{$type_sequence} + 1; delete $block_opening_line_number{$type_sequence}; } else { # Error: block opening line undefined for this line.. # This shouldn't be possible, but it is not a # significant problem. } } elsif ( $token eq '{' ) { my $line_number = $vertical_aligner_object->get_output_line_number(); $block_opening_line_number{$type_sequence} = $line_number; if ( $accumulating_text_for_block && $levels_to_go[$i] == $leading_block_text_level ) { if ( $accumulating_text_for_block eq $block_type ) { # save any leading text before we enter this block $block_leading_text{$type_sequence} = [ $leading_block_text, $rleading_block_if_elsif_text ]; $block_opening_line_number{$type_sequence} = $leading_block_text_line_number; reset_block_text_accumulator(); } else { # shouldn't happen, but not a serious error. # We were accumulating -csc text for block type # $accumulating_text_for_block and unexpectedly # encountered a '{' for block type $block_type. } } } } if ( $type eq 'k' && $csc_new_statement_ok && $is_if_elsif_else_unless_while_until_for_foreach{$token} && $token =~ /$closing_side_comment_list_pattern/o ) { set_block_text_accumulator($i); } else { # note: ignoring type 'q' because of tricks being played # with 'q' for hanging side comments if ( $type ne 'b' && $type ne '#' && $type ne 'q' ) { $csc_new_statement_ok = ( $block_type || $type eq 'J' || $type eq ';' ); } if ( $type eq ';' && $accumulating_text_for_block && $levels_to_go[$i] == $leading_block_text_level ) { reset_block_text_accumulator(); } else { accumulate_block_text($i); } } } # Treat an 'else' block specially by adding preceding 'if' and # 'elsif' text. Otherwise, the 'end else' is not helpful, # especially for cuddled-else formatting. if ( $terminal_block_type =~ /^els/ && $rblock_leading_if_elsif_text ) { $block_leading_text = make_else_csc_text( $i_terminal, $terminal_block_type, $block_leading_text, $rblock_leading_if_elsif_text ); } return ( $terminal_type, $i_terminal, $i_block_leading_text, $block_leading_text, $block_line_count ); } } sub make_else_csc_text { # create additional -csc text for an 'else' and optionally 'elsif', # depending on the value of switch # $rOpts_closing_side_comment_else_flag: # # = 0 add 'if' text to trailing else # = 1 same as 0 plus: # add 'if' to 'elsif's if can fit in line length # add last 'elsif' to trailing else if can fit in one line # = 2 same as 1 but do not check if exceed line length # # $rif_elsif_text = a reference to a list of all previous closing # side comments created for this if block # my ( $i_terminal, $block_type, $block_leading_text, $rif_elsif_text ) = @_; my $csc_text = $block_leading_text; if ( $block_type eq 'elsif' && $rOpts_closing_side_comment_else_flag == 0 ) { return $csc_text; } my $count = @{$rif_elsif_text}; return $csc_text unless ($count); my $if_text = '[ if' . $rif_elsif_text->[0]; # always show the leading 'if' text on 'else' if ( $block_type eq 'else' ) { $csc_text .= $if_text; } # see if that's all if ( $rOpts_closing_side_comment_else_flag == 0 ) { return $csc_text; } my $last_elsif_text = ""; if ( $count > 1 ) { $last_elsif_text = ' [elsif' . $rif_elsif_text->[ $count - 1 ]; if ( $count > 2 ) { $last_elsif_text = ' [...' . $last_elsif_text; } } # tentatively append one more item my $saved_text = $csc_text; if ( $block_type eq 'else' ) { $csc_text .= $last_elsif_text; } else { $csc_text .= ' ' . $if_text; } # all done if no length checks requested if ( $rOpts_closing_side_comment_else_flag == 2 ) { return $csc_text; } # undo it if line length exceeded my $length = length($csc_text) + length($block_type) + length( $rOpts->{'closing-side-comment-prefix'} ) + $levels_to_go[$i_terminal] * $rOpts_indent_columns + 3; if ( $length > $rOpts_maximum_line_length ) { $csc_text = $saved_text; } return $csc_text; } { # sub balance_csc_text my %matching_char; BEGIN { %matching_char = ( '{' => '}', '(' => ')', '[' => ']', '}' => '{', ')' => '(', ']' => '[', ); } sub balance_csc_text { # Append characters to balance a closing side comment so that editors # such as vim can correctly jump through code. # Simple Example: # input = ## end foreach my $foo ( sort { $b ... # output = ## end foreach my $foo ( sort { $b ...}) # NOTE: This routine does not currently filter out structures within # quoted text because the bounce algorithims in text editors do not # necessarily do this either (a version of vim was checked and # did not do this). # Some complex examples which will cause trouble for some editors: # while ( $mask_string =~ /\{[^{]*?\}/g ) { # if ( $mask_str =~ /\}\s*els[^\{\}]+\{$/ ) { # if ( $1 eq '{' ) { # test file test1/braces.pl has many such examples. my ($csc) = @_; # loop to examine characters one-by-one, RIGHT to LEFT and # build a balancing ending, LEFT to RIGHT. for ( my $pos = length($csc) - 1 ; $pos >= 0 ; $pos-- ) { my $char = substr( $csc, $pos, 1 ); # ignore everything except structural characters next unless ( $matching_char{$char} ); # pop most recently appended character my $top = chop($csc); # push it back plus the mate to the newest character # unless they balance each other. $csc = $csc . $top . $matching_char{$char} unless $top eq $char; } # return the balanced string return $csc; } } sub add_closing_side_comment { # add closing side comments after closing block braces if -csc used my $cscw_block_comment; #--------------------------------------------------------------- # Step 1: loop through all tokens of this line to accumulate # the text needed to create the closing side comments. Also see # how the line ends. #--------------------------------------------------------------- my ( $terminal_type, $i_terminal, $i_block_leading_text, $block_leading_text, $block_line_count ) = accumulate_csc_text(); #--------------------------------------------------------------- # Step 2: make the closing side comment if this ends a block #--------------------------------------------------------------- my $have_side_comment = $i_terminal != $max_index_to_go; # if this line might end in a block closure.. if ( $terminal_type eq '}' # ..and either && ( # the block is long enough ( $block_line_count >= $rOpts->{'closing-side-comment-interval'} ) # or there is an existing comment to check || ( $have_side_comment && $rOpts->{'closing-side-comment-warnings'} ) ) # .. and if this is one of the types of interest && $block_type_to_go[$i_terminal] =~ /$closing_side_comment_list_pattern/o # .. but not an anonymous sub # These are not normally of interest, and their closing braces are # often followed by commas or semicolons anyway. This also avoids # possible erratic output due to line numbering inconsistencies # in the cases where their closing braces terminate a line. && $block_type_to_go[$i_terminal] ne 'sub' # ..and the corresponding opening brace must is not in this batch # (because we do not need to tag one-line blocks, although this # should also be caught with a positive -csci value) && $mate_index_to_go[$i_terminal] < 0 # ..and either && ( # this is the last token (line doesnt have a side comment) !$have_side_comment # or the old side comment is a closing side comment || $tokens_to_go[$max_index_to_go] =~ /$closing_side_comment_prefix_pattern/o ) ) { # then make the closing side comment text my $token = "$rOpts->{'closing-side-comment-prefix'} $block_type_to_go[$i_terminal]"; # append any extra descriptive text collected above if ( $i_block_leading_text == $i_terminal ) { $token .= $block_leading_text; } $token = balance_csc_text($token) if $rOpts->{'closing-side-comments-balanced'}; $token =~ s/\s*$//; # trim any trailing whitespace # handle case of existing closing side comment if ($have_side_comment) { # warn if requested and tokens differ significantly if ( $rOpts->{'closing-side-comment-warnings'} ) { my $old_csc = $tokens_to_go[$max_index_to_go]; my $new_csc = $token; $new_csc =~ s/\s+//g; # trim all whitespace $old_csc =~ s/\s+//g; # trim all whitespace $new_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures $old_csc =~ s/[\]\)\}\s]*$//; # trim trailing structures $new_csc =~ s/(\.\.\.)$//; # trim trailing '...' my $new_trailing_dots = $1; $old_csc =~ s/(\.\.\.)\s*$//; # trim trailing '...' # Patch to handle multiple closing side comments at # else and elsif's. These have become too complicated # to check, so if we see an indication of # '[ if' or '[ # elsif', then assume they were made # by perltidy. if ( $block_type_to_go[$i_terminal] eq 'else' ) { if ( $old_csc =~ /\[\s*elsif/ ) { $old_csc = $new_csc } } elsif ( $block_type_to_go[$i_terminal] eq 'elsif' ) { if ( $old_csc =~ /\[\s*if/ ) { $old_csc = $new_csc } } # if old comment is contained in new comment, # only compare the common part. if ( length($new_csc) > length($old_csc) ) { $new_csc = substr( $new_csc, 0, length($old_csc) ); } # if the new comment is shorter and has been limited, # only compare the common part. if ( length($new_csc) < length($old_csc) && $new_trailing_dots ) { $old_csc = substr( $old_csc, 0, length($new_csc) ); } # any remaining difference? if ( $new_csc ne $old_csc ) { # just leave the old comment if we are below the threshold # for creating side comments if ( $block_line_count < $rOpts->{'closing-side-comment-interval'} ) { $token = undef; } # otherwise we'll make a note of it else { warning( "perltidy -cscw replaced: $tokens_to_go[$max_index_to_go]\n" ); # save the old side comment in a new trailing block comment my ( $day, $month, $year ) = (localtime)[ 3, 4, 5 ]; $year += 1900; $month += 1; $cscw_block_comment = "## perltidy -cscw $year-$month-$day: $tokens_to_go[$max_index_to_go]"; } } else { # No differences.. we can safely delete old comment if we # are below the threshold if ( $block_line_count < $rOpts->{'closing-side-comment-interval'} ) { $token = undef; unstore_token_to_go() if ( $types_to_go[$max_index_to_go] eq '#' ); unstore_token_to_go() if ( $types_to_go[$max_index_to_go] eq 'b' ); } } } # switch to the new csc (unless we deleted it!) $tokens_to_go[$max_index_to_go] = $token if $token; } # handle case of NO existing closing side comment else { # insert the new side comment into the output token stream my $type = '#'; my $block_type = ''; my $type_sequence = ''; my $container_environment = $container_environment_to_go[$max_index_to_go]; my $level = $levels_to_go[$max_index_to_go]; my $slevel = $nesting_depth_to_go[$max_index_to_go]; my $no_internal_newlines = 0; my $nesting_blocks = $nesting_blocks_to_go[$max_index_to_go]; my $ci_level = $ci_levels_to_go[$max_index_to_go]; my $in_continued_quote = 0; # first insert a blank token insert_new_token_to_go( ' ', 'b', $slevel, $no_internal_newlines ); # then the side comment insert_new_token_to_go( $token, $type, $slevel, $no_internal_newlines ); } } return $cscw_block_comment; } sub previous_nonblank_token { my ($i) = @_; my $name = ""; my $im = $i - 1; return "" if ( $im < 0 ); if ( $types_to_go[$im] eq 'b' ) { $im--; } return "" if ( $im < 0 ); $name = $tokens_to_go[$im]; # prepend any sub name to an isolated -> to avoid unwanted alignments # [test case is test8/penco.pl] if ( $name eq '->' ) { $im--; if ( $im >= 0 && $types_to_go[$im] ne 'b' ) { $name = $tokens_to_go[$im] . $name; } } return $name; } sub send_lines_to_vertical_aligner { my ( $ri_first, $ri_last, $do_not_pad ) = @_; my $rindentation_list = [0]; # ref to indentations for each line # define the array @matching_token_to_go for the output tokens # which will be non-blank for each special token (such as =>) # for which alignment is required. set_vertical_alignment_markers( $ri_first, $ri_last ); # flush if necessary to avoid unwanted alignment my $must_flush = 0; if ( @$ri_first > 1 ) { # flush before a long if statement if ( $types_to_go[0] eq 'k' && $tokens_to_go[0] =~ /^(if|unless)$/ ) { $must_flush = 1; } } if ($must_flush) { Perl::Tidy::VerticalAligner::flush(); } undo_ci( $ri_first, $ri_last ); set_logical_padding( $ri_first, $ri_last ); # loop to prepare each line for shipment my $n_last_line = @$ri_first - 1; my $in_comma_list; for my $n ( 0 .. $n_last_line ) { my $ibeg = $$ri_first[$n]; my $iend = $$ri_last[$n]; my ( $rtokens, $rfields, $rpatterns ) = make_alignment_patterns( $ibeg, $iend ); my ( $indentation, $lev, $level_end, $terminal_type, $is_semicolon_terminated, $is_outdented_line ) = set_adjusted_indentation( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last, $rindentation_list ); # we will allow outdenting of long lines.. my $outdent_long_lines = ( # which are long quotes, if allowed ( $types_to_go[$ibeg] eq 'Q' && $rOpts->{'outdent-long-quotes'} ) # which are long block comments, if allowed || ( $types_to_go[$ibeg] eq '#' && $rOpts->{'outdent-long-comments'} # but not if this is a static block comment && !$is_static_block_comment ) ); my $level_jump = $nesting_depth_to_go[ $iend + 1 ] - $nesting_depth_to_go[$ibeg]; my $rvertical_tightness_flags = set_vertical_tightness_flags( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ); # flush an outdented line to avoid any unwanted vertical alignment Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line); my $is_terminal_ternary = 0; if ( $tokens_to_go[$ibeg] eq ':' || $n > 0 && $tokens_to_go[ $$ri_last[ $n - 1 ] ] eq ':' ) { if ( ( $terminal_type eq ';' && $level_end <= $lev ) || ( $level_end < $lev ) ) { $is_terminal_ternary = 1; } } # send this new line down the pipe my $forced_breakpoint = $forced_breakpoint_to_go[$iend]; Perl::Tidy::VerticalAligner::append_line( $lev, $level_end, $indentation, $rfields, $rtokens, $rpatterns, $forced_breakpoint_to_go[$iend] || $in_comma_list, $outdent_long_lines, $is_terminal_ternary, $is_semicolon_terminated, $do_not_pad, $rvertical_tightness_flags, $level_jump, ); $in_comma_list = $tokens_to_go[$iend] eq ',' && $forced_breakpoint_to_go[$iend]; # flush an outdented line to avoid any unwanted vertical alignment Perl::Tidy::VerticalAligner::flush() if ($is_outdented_line); $do_not_pad = 0; } # end of loop to output each line # remember indentation of lines containing opening containers for # later use by sub set_adjusted_indentation save_opening_indentation( $ri_first, $ri_last, $rindentation_list ); } { # begin make_alignment_patterns my %block_type_map; my %keyword_map; BEGIN { # map related block names into a common name to # allow alignment %block_type_map = ( 'unless' => 'if', 'else' => 'if', 'elsif' => 'if', 'when' => 'if', 'default' => 'if', 'case' => 'if', 'sort' => 'map', 'grep' => 'map', ); # map certain keywords to the same 'if' class to align # long if/elsif sequences. [elsif.pl] %keyword_map = ( 'unless' => 'if', 'else' => 'if', 'elsif' => 'if', 'when' => 'given', 'default' => 'given', 'case' => 'switch', # treat an 'undef' similar to numbers and quotes 'undef' => 'Q', ); } sub make_alignment_patterns { # Here we do some important preliminary work for the # vertical aligner. We create three arrays for one # output line. These arrays contain strings that can # be tested by the vertical aligner to see if # consecutive lines can be aligned vertically. # # The three arrays are indexed on the vertical # alignment fields and are: # @tokens - a list of any vertical alignment tokens for this line. # These are tokens, such as '=' '&&' '#' etc which # we want to might align vertically. These are # decorated with various information such as # nesting depth to prevent unwanted vertical # alignment matches. # @fields - the actual text of the line between the vertical alignment # tokens. # @patterns - a modified list of token types, one for each alignment # field. These should normally each match before alignment is # allowed, even when the alignment tokens match. my ( $ibeg, $iend ) = @_; my @tokens = (); my @fields = (); my @patterns = (); my $i_start = $ibeg; my $i; my $depth = 0; my @container_name = (""); my @multiple_comma_arrows = (undef); my $j = 0; # field index $patterns[0] = ""; for $i ( $ibeg .. $iend ) { # Keep track of containers balanced on this line only. # These are used below to prevent unwanted cross-line alignments. # Unbalanced containers already avoid aligning across # container boundaries. if ( $tokens_to_go[$i] eq '(' ) { # if container is balanced on this line... my $i_mate = $mate_index_to_go[$i]; if ( $i_mate > $i && $i_mate <= $iend ) { $depth++; my $seqno = $type_sequence_to_go[$i]; my $count = comma_arrow_count($seqno); $multiple_comma_arrows[$depth] = $count && $count > 1; # Append the previous token name to make the container name # more unique. This name will also be given to any commas # within this container, and it helps avoid undesirable # alignments of different types of containers. my $name = previous_nonblank_token($i); $name =~ s/^->//; $container_name[$depth] = "+" . $name; # Make the container name even more unique if necessary. # If we are not vertically aligning this opening paren, # append a character count to avoid bad alignment because # it usually looks bad to align commas within continers # for which the opening parens do not align. Here # is an example very BAD alignment of commas (because # the atan2 functions are not all aligned): # $XY = # $X * $RTYSQP1 * atan2( $X, $RTYSQP1 ) + # $Y * $RTXSQP1 * atan2( $Y, $RTXSQP1 ) - # $X * atan2( $X, 1 ) - # $Y * atan2( $Y, 1 ); # # On the other hand, it is usually okay to align commas if # opening parens align, such as: # glVertex3d( $cx + $s * $xs, $cy, $z ); # glVertex3d( $cx, $cy + $s * $ys, $z ); # glVertex3d( $cx - $s * $xs, $cy, $z ); # glVertex3d( $cx, $cy - $s * $ys, $z ); # # To distinguish between these situations, we will # append the length of the line from the previous matching # token, or beginning of line, to the function name. This # will allow the vertical aligner to reject undesirable # matches. # if we are not aligning on this paren... if ( $matching_token_to_go[$i] eq '' ) { # Sum length from previous alignment, or start of line. # Note that we have to sum token lengths here because # padding has been done and so array $lengths_to_go # is now wrong. my $len = length( join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) ); $len += leading_spaces_to_go($i_start) if ( $i_start == $ibeg ); # tack length onto the container name to make unique $container_name[$depth] .= "-" . $len; } } } elsif ( $tokens_to_go[$i] eq ')' ) { $depth-- if $depth > 0; } # if we find a new synchronization token, we are done with # a field if ( $i > $i_start && $matching_token_to_go[$i] ne '' ) { my $tok = my $raw_tok = $matching_token_to_go[$i]; # make separators in different nesting depths unique # by appending the nesting depth digit. if ( $raw_tok ne '#' ) { $tok .= "$nesting_depth_to_go[$i]"; } # also decorate commas with any container name to avoid # unwanted cross-line alignments. if ( $raw_tok eq ',' || $raw_tok eq '=>' ) { if ( $container_name[$depth] ) { $tok .= $container_name[$depth]; } } # Patch to avoid aligning leading and trailing if, unless. # Mark trailing if, unless statements with container names. # This makes them different from leading if, unless which # are not so marked at present. If we ever need to name # them too, we could use ci to distinguish them. # Example problem to avoid: # return ( 2, "DBERROR" ) # if ( $retval == 2 ); # if ( scalar @_ ) { # my ( $a, $b, $c, $d, $e, $f ) = @_; # } if ( $raw_tok eq '(' ) { my $ci = $ci_levels_to_go[$ibeg]; if ( $container_name[$depth] =~ /^\+(if|unless)/ && $ci ) { $tok .= $container_name[$depth]; } } # Decorate block braces with block types to avoid # unwanted alignments such as the following: # foreach ( @{$routput_array} ) { $fh->print($_) } # eval { $fh->close() }; if ( $raw_tok eq '{' && $block_type_to_go[$i] ) { my $block_type = $block_type_to_go[$i]; # map certain related block types to allow # else blocks to align $block_type = $block_type_map{$block_type} if ( defined( $block_type_map{$block_type} ) ); # remove sub names to allow one-line sub braces to align # regardless of name if ( $block_type =~ /^sub / ) { $block_type = 'sub' } # allow all control-type blocks to align if ( $block_type =~ /^[A-Z]+$/ ) { $block_type = 'BEGIN' } $tok .= $block_type; } # concatenate the text of the consecutive tokens to form # the field push( @fields, join( '', @tokens_to_go[ $i_start .. $i - 1 ] ) ); # store the alignment token for this field push( @tokens, $tok ); # get ready for the next batch $i_start = $i; $j++; $patterns[$j] = ""; } # continue accumulating tokens # handle non-keywords.. if ( $types_to_go[$i] ne 'k' ) { my $type = $types_to_go[$i]; # Mark most things before arrows as a quote to # get them to line up. Testfile: mixed.pl. if ( ( $i < $iend - 1 ) && ( $type =~ /^[wnC]$/ ) ) { my $next_type = $types_to_go[ $i + 1 ]; my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); if ( $types_to_go[$i_next_nonblank] eq '=>' ) { $type = 'Q'; # Patch to ignore leading minus before words, # by changing pattern 'mQ' into just 'Q', # so that we can align things like this: # Button => "Print letter \"~$_\"", # -command => [ sub { print "$_[0]\n" }, $_ ], if ( $patterns[$j] eq 'm' ) { $patterns[$j] = "" } } } # patch to make numbers and quotes align if ( $type eq 'n' ) { $type = 'Q' } # patch to ignore any ! in patterns if ( $type eq '!' ) { $type = '' } $patterns[$j] .= $type; } # for keywords we have to use the actual text else { my $tok = $tokens_to_go[$i]; # but map certain keywords to a common string to allow # alignment. $tok = $keyword_map{$tok} if ( defined( $keyword_map{$tok} ) ); $patterns[$j] .= $tok; } } # done with this line .. join text of tokens to make the last field push( @fields, join( '', @tokens_to_go[ $i_start .. $iend ] ) ); return ( \@tokens, \@fields, \@patterns ); } } # end make_alignment_patterns { # begin unmatched_indexes # closure to keep track of unbalanced containers. # arrays shared by the routines in this block: my @unmatched_opening_indexes_in_this_batch; my @unmatched_closing_indexes_in_this_batch; my %comma_arrow_count; sub is_unbalanced_batch { @unmatched_opening_indexes_in_this_batch + @unmatched_closing_indexes_in_this_batch; } sub comma_arrow_count { my $seqno = $_[0]; return $comma_arrow_count{$seqno}; } sub match_opening_and_closing_tokens { # Match up indexes of opening and closing braces, etc, in this batch. # This has to be done after all tokens are stored because unstoring # of tokens would otherwise cause trouble. @unmatched_opening_indexes_in_this_batch = (); @unmatched_closing_indexes_in_this_batch = (); %comma_arrow_count = (); my ( $i, $i_mate, $token ); foreach $i ( 0 .. $max_index_to_go ) { if ( $type_sequence_to_go[$i] ) { $token = $tokens_to_go[$i]; if ( $token =~ /^[\(\[\{\?]$/ ) { push @unmatched_opening_indexes_in_this_batch, $i; } elsif ( $token =~ /^[\)\]\}\:]$/ ) { $i_mate = pop @unmatched_opening_indexes_in_this_batch; if ( defined($i_mate) && $i_mate >= 0 ) { if ( $type_sequence_to_go[$i_mate] == $type_sequence_to_go[$i] ) { $mate_index_to_go[$i] = $i_mate; $mate_index_to_go[$i_mate] = $i; } else { push @unmatched_opening_indexes_in_this_batch, $i_mate; push @unmatched_closing_indexes_in_this_batch, $i; } } else { push @unmatched_closing_indexes_in_this_batch, $i; } } } elsif ( $tokens_to_go[$i] eq '=>' ) { if (@unmatched_opening_indexes_in_this_batch) { my $j = $unmatched_opening_indexes_in_this_batch[-1]; my $seqno = $type_sequence_to_go[$j]; $comma_arrow_count{$seqno}++; } } } } sub save_opening_indentation { # This should be called after each batch of tokens is output. It # saves indentations of lines of all unmatched opening tokens. # These will be used by sub get_opening_indentation. my ( $ri_first, $ri_last, $rindentation_list ) = @_; # we no longer need indentations of any saved indentations which # are unmatched closing tokens in this batch, because we will # never encounter them again. So we can delete them to keep # the hash size down. foreach (@unmatched_closing_indexes_in_this_batch) { my $seqno = $type_sequence_to_go[$_]; delete $saved_opening_indentation{$seqno}; } # we need to save indentations of any unmatched opening tokens # in this batch because we may need them in a subsequent batch. foreach (@unmatched_opening_indexes_in_this_batch) { my $seqno = $type_sequence_to_go[$_]; $saved_opening_indentation{$seqno} = [ lookup_opening_indentation( $_, $ri_first, $ri_last, $rindentation_list ) ]; } } } # end unmatched_indexes sub get_opening_indentation { # get the indentation of the line which output the opening token # corresponding to a given closing token in the current output batch. # # given: # $i_closing - index in this line of a closing token ')' '}' or ']' # # $ri_first - reference to list of the first index $i for each output # line in this batch # $ri_last - reference to list of the last index $i for each output line # in this batch # $rindentation_list - reference to a list containing the indentation # used for each line. # # return: # -the indentation of the line which contained the opening token # which matches the token at index $i_opening # -and its offset (number of columns) from the start of the line # my ( $i_closing, $ri_first, $ri_last, $rindentation_list ) = @_; # first, see if the opening token is in the current batch my $i_opening = $mate_index_to_go[$i_closing]; my ( $indent, $offset, $is_leading, $exists ); $exists = 1; if ( $i_opening >= 0 ) { # it is..look up the indentation ( $indent, $offset, $is_leading ) = lookup_opening_indentation( $i_opening, $ri_first, $ri_last, $rindentation_list ); } # if not, it should have been stored in the hash by a previous batch else { my $seqno = $type_sequence_to_go[$i_closing]; if ($seqno) { if ( $saved_opening_indentation{$seqno} ) { ( $indent, $offset, $is_leading ) = @{ $saved_opening_indentation{$seqno} }; } # some kind of serious error # (example is badfile.t) else { $indent = 0; $offset = 0; $is_leading = 0; $exists = 0; } } # if no sequence number it must be an unbalanced container else { $indent = 0; $offset = 0; $is_leading = 0; $exists = 0; } } return ( $indent, $offset, $is_leading, $exists ); } sub lookup_opening_indentation { # get the indentation of the line in the current output batch # which output a selected opening token # # given: # $i_opening - index of an opening token in the current output batch # whose line indentation we need # $ri_first - reference to list of the first index $i for each output # line in this batch # $ri_last - reference to list of the last index $i for each output line # in this batch # $rindentation_list - reference to a list containing the indentation # used for each line. (NOTE: the first slot in # this list is the last returned line number, and this is # followed by the list of indentations). # # return # -the indentation of the line which contained token $i_opening # -and its offset (number of columns) from the start of the line my ( $i_opening, $ri_start, $ri_last, $rindentation_list ) = @_; my $nline = $rindentation_list->[0]; # line number of previous lookup # reset line location if necessary $nline = 0 if ( $i_opening < $ri_start->[$nline] ); # find the correct line unless ( $i_opening > $ri_last->[-1] ) { while ( $i_opening > $ri_last->[$nline] ) { $nline++; } } # error - token index is out of bounds - shouldn't happen else { warning( "non-fatal program bug in lookup_opening_indentation - index out of range\n" ); report_definite_bug(); $nline = $#{$ri_last}; } $rindentation_list->[0] = $nline; # save line number to start looking next call my $ibeg = $ri_start->[$nline]; my $offset = token_sequence_length( $ibeg, $i_opening ) - 1; my $is_leading = ( $ibeg == $i_opening ); return ( $rindentation_list->[ $nline + 1 ], $offset, $is_leading ); } { my %is_if_elsif_else_unless_while_until_for_foreach; BEGIN { # These block types may have text between the keyword and opening # curly. Note: 'else' does not, but must be included to allow trailing # if/elsif text to be appended. # patch for SWITCH/CASE: added 'case' and 'when' @_ = qw(if elsif else unless while until for foreach case when); @is_if_elsif_else_unless_while_until_for_foreach{@_} = (1) x scalar(@_); } sub set_adjusted_indentation { # This routine has the final say regarding the actual indentation of # a line. It starts with the basic indentation which has been # defined for the leading token, and then takes into account any # options that the user has set regarding special indenting and # outdenting. my ( $ibeg, $iend, $rfields, $rpatterns, $ri_first, $ri_last, $rindentation_list ) = @_; # we need to know the last token of this line my ( $terminal_type, $i_terminal ) = terminal_type( \@types_to_go, \@block_type_to_go, $ibeg, $iend ); my $is_outdented_line = 0; my $is_semicolon_terminated = $terminal_type eq ';' && $nesting_depth_to_go[$iend] < $nesting_depth_to_go[$ibeg]; ########################################################## # Section 1: set a flag and a default indentation # # Most lines are indented according to the initial token. # But it is common to outdent to the level just after the # terminal token in certain cases... # adjust_indentation flag: # 0 - do not adjust # 1 - outdent # 2 - vertically align with opening token # 3 - indent ########################################################## my $adjust_indentation = 0; my $default_adjust_indentation = $adjust_indentation; my ( $opening_indentation, $opening_offset, $is_leading, $opening_exists ); # if we are at a closing token of some type.. if ( $types_to_go[$ibeg] =~ /^[\)\}\]]$/ ) { # get the indentation of the line containing the corresponding # opening token ( $opening_indentation, $opening_offset, $is_leading, $opening_exists ) = get_opening_indentation( $ibeg, $ri_first, $ri_last, $rindentation_list ); # First set the default behavior: # default behavior is to outdent closing lines # of the form: "); }; ]; )->xxx;" if ( $is_semicolon_terminated # and 'cuddled parens' of the form: ")->pack(" || ( $terminal_type eq '(' && $types_to_go[$ibeg] eq ')' && ( $nesting_depth_to_go[$iend] + 1 == $nesting_depth_to_go[$ibeg] ) ) ) { $adjust_indentation = 1; } # TESTING: outdent something like '),' if ( $terminal_type eq ',' # allow just one character before the comma && $i_terminal == $ibeg + 1 # requre LIST environment; otherwise, we may outdent too much -- # this can happen in calls without parentheses (overload.t); && $container_environment_to_go[$i_terminal] eq 'LIST' ) { $adjust_indentation = 1; } # undo continuation indentation of a terminal closing token if # it is the last token before a level decrease. This will allow # a closing token to line up with its opening counterpart, and # avoids a indentation jump larger than 1 level. if ( $types_to_go[$i_terminal] =~ /^[\}\]\)R]$/ && $i_terminal == $ibeg ) { my $ci = $ci_levels_to_go[$ibeg]; my $lev = $levels_to_go[$ibeg]; my $next_type = $types_to_go[ $ibeg + 1 ]; my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $ibeg + 2 : $ibeg + 1 ); if ( $i_next_nonblank <= $max_index_to_go && $levels_to_go[$i_next_nonblank] < $lev ) { $adjust_indentation = 1; } } # YVES patch 1 of 2: # Undo ci of line with leading closing eval brace, # but not beyond the indention of the line with # the opening brace. if ( $block_type_to_go[$ibeg] eq 'eval' && !$rOpts->{'line-up-parentheses'} && !$rOpts->{'indent-closing-brace'} ) { ( $opening_indentation, $opening_offset, $is_leading, $opening_exists ) = get_opening_indentation( $ibeg, $ri_first, $ri_last, $rindentation_list ); my $indentation = $leading_spaces_to_go[$ibeg]; if ( defined($opening_indentation) && $indentation > $opening_indentation ) { $adjust_indentation = 1; } } $default_adjust_indentation = $adjust_indentation; # Now modify default behavior according to user request: # handle option to indent non-blocks of the form ); }; ]; # But don't do special indentation to something like ')->pack(' if ( !$block_type_to_go[$ibeg] ) { my $cti = $closing_token_indentation{ $tokens_to_go[$ibeg] }; if ( $cti == 1 ) { if ( $i_terminal <= $ibeg + 1 || $is_semicolon_terminated ) { $adjust_indentation = 2; } else { $adjust_indentation = 0; } } elsif ( $cti == 2 ) { if ($is_semicolon_terminated) { $adjust_indentation = 3; } else { $adjust_indentation = 0; } } elsif ( $cti == 3 ) { $adjust_indentation = 3; } } # handle option to indent blocks else { if ( $rOpts->{'indent-closing-brace'} && ( $i_terminal == $ibeg # isolated terminal '}' || $is_semicolon_terminated ) ) # } xxxx ; { $adjust_indentation = 3; } } } # if at ');', '};', '>;', and '];' of a terminal qw quote elsif ($$rpatterns[0] =~ /^qb*;$/ && $$rfields[0] =~ /^([\)\}\]\>]);$/ ) { if ( $closing_token_indentation{$1} == 0 ) { $adjust_indentation = 1; } else { $adjust_indentation = 3; } } # if line begins with a ':', align it with any # previous line leading with corresponding ? elsif ( $types_to_go[$ibeg] eq ':' ) { ( $opening_indentation, $opening_offset, $is_leading, $opening_exists ) = get_opening_indentation( $ibeg, $ri_first, $ri_last, $rindentation_list ); if ($is_leading) { $adjust_indentation = 2; } } ########################################################## # Section 2: set indentation according to flag set above # # Select the indentation object to define leading # whitespace. If we are outdenting something like '} } );' # then we want to use one level below the last token # ($i_terminal) in order to get it to fully outdent through # all levels. ########################################################## my $indentation; my $lev; my $level_end = $levels_to_go[$iend]; if ( $adjust_indentation == 0 ) { $indentation = $leading_spaces_to_go[$ibeg]; $lev = $levels_to_go[$ibeg]; } elsif ( $adjust_indentation == 1 ) { $indentation = $reduced_spaces_to_go[$i_terminal]; $lev = $levels_to_go[$i_terminal]; } # handle indented closing token which aligns with opening token elsif ( $adjust_indentation == 2 ) { # handle option to align closing token with opening token $lev = $levels_to_go[$ibeg]; # calculate spaces needed to align with opening token my $space_count = get_SPACES($opening_indentation) + $opening_offset; # Indent less than the previous line. # # Problem: For -lp we don't exactly know what it was if there # were recoverable spaces sent to the aligner. A good solution # would be to force a flush of the vertical alignment buffer, so # that we would know. For now, this rule is used for -lp: # # When the last line did not start with a closing token we will # be optimistic that the aligner will recover everything wanted. # # This rule will prevent us from breaking a hierarchy of closing # tokens, and in a worst case will leave a closing paren too far # indented, but this is better than frequently leaving it not # indented enough. my $last_spaces = get_SPACES($last_indentation_written); if ( $last_leading_token !~ /^[\}\]\)]$/ ) { $last_spaces += get_RECOVERABLE_SPACES($last_indentation_written); } # reset the indentation to the new space count if it works # only options are all or none: nothing in-between looks good $lev = $levels_to_go[$ibeg]; if ( $space_count < $last_spaces ) { if ($rOpts_line_up_parentheses) { my $lev = $levels_to_go[$ibeg]; $indentation = new_lp_indentation_item( $space_count, $lev, 0, 0, 0 ); } else { $indentation = $space_count; } } # revert to default if it doesnt work else { $space_count = leading_spaces_to_go($ibeg); if ( $default_adjust_indentation == 0 ) { $indentation = $leading_spaces_to_go[$ibeg]; } elsif ( $default_adjust_indentation == 1 ) { $indentation = $reduced_spaces_to_go[$i_terminal]; $lev = $levels_to_go[$i_terminal]; } } } # Full indentaion of closing tokens (-icb and -icp or -cti=2) else { # handle -icb (indented closing code block braces) # Updated method for indented block braces: indent one full level if # there is no continuation indentation. This will occur for major # structures such as sub, if, else, but not for things like map # blocks. # # Note: only code blocks without continuation indentation are # handled here (if, else, unless, ..). In the following snippet, # the terminal brace of the sort block will have continuation # indentation as shown so it will not be handled by the coding # here. We would have to undo the continuation indentation to do # this, but it probably looks ok as is. This is a possible future # update for semicolon terminated lines. # # if ($sortby eq 'date' or $sortby eq 'size') { # @files = sort { # $file_data{$a}{$sortby} <=> $file_data{$b}{$sortby} # or $a cmp $b # } @files; # } # if ( $block_type_to_go[$ibeg] && $ci_levels_to_go[$i_terminal] == 0 ) { my $spaces = get_SPACES( $leading_spaces_to_go[$i_terminal] ); $indentation = $spaces + $rOpts_indent_columns; # NOTE: for -lp we could create a new indentation object, but # there is probably no need to do it } # handle -icp and any -icb block braces which fall through above # test such as the 'sort' block mentioned above. else { # There are currently two ways to handle -icp... # One way is to use the indentation of the previous line: # $indentation = $last_indentation_written; # The other way is to use the indentation that the previous line # would have had if it hadn't been adjusted: $indentation = $last_unadjusted_indentation; # Current method: use the minimum of the two. This avoids # inconsistent indentation. if ( get_SPACES($last_indentation_written) < get_SPACES($indentation) ) { $indentation = $last_indentation_written; } } # use previous indentation but use own level # to cause list to be flushed properly $lev = $levels_to_go[$ibeg]; } # remember indentation except for multi-line quotes, which get # no indentation unless ( $ibeg == 0 && $starting_in_quote ) { $last_indentation_written = $indentation; $last_unadjusted_indentation = $leading_spaces_to_go[$ibeg]; $last_leading_token = $tokens_to_go[$ibeg]; } # be sure lines with leading closing tokens are not outdented more # than the line which contained the corresponding opening token. ############################################################# # updated per bug report in alex_bug.pl: we must not # mess with the indentation of closing logical braces so # we must treat something like '} else {' as if it were # an isolated brace my $is_isolated_block_brace = ( # $iend == $ibeg ) && $block_type_to_go[$ibeg]; ############################################################# my $is_isolated_block_brace = $block_type_to_go[$ibeg] && ( $iend == $ibeg || $is_if_elsif_else_unless_while_until_for_foreach{ $block_type_to_go[$ibeg] } ); # only do this for a ':; which is aligned with its leading '?' my $is_unaligned_colon = $types_to_go[$ibeg] eq ':' && !$is_leading; if ( defined($opening_indentation) && !$is_isolated_block_brace && !$is_unaligned_colon ) { if ( get_SPACES($opening_indentation) > get_SPACES($indentation) ) { $indentation = $opening_indentation; } } # remember the indentation of each line of this batch push @{$rindentation_list}, $indentation; # outdent lines with certain leading tokens... if ( # must be first word of this batch $ibeg == 0 # and ... && ( # certain leading keywords if requested ( $rOpts->{'outdent-keywords'} && $types_to_go[$ibeg] eq 'k' && $outdent_keyword{ $tokens_to_go[$ibeg] } ) # or labels if requested || ( $rOpts->{'outdent-labels'} && $types_to_go[$ibeg] eq 'J' ) # or static block comments if requested || ( $types_to_go[$ibeg] eq '#' && $rOpts->{'outdent-static-block-comments'} && $is_static_block_comment ) ) ) { my $space_count = leading_spaces_to_go($ibeg); if ( $space_count > 0 ) { $space_count -= $rOpts_continuation_indentation; $is_outdented_line = 1; if ( $space_count < 0 ) { $space_count = 0 } # do not promote a spaced static block comment to non-spaced; # this is not normally necessary but could be for some # unusual user inputs (such as -ci = -i) if ( $types_to_go[$ibeg] eq '#' && $space_count == 0 ) { $space_count = 1; } if ($rOpts_line_up_parentheses) { $indentation = new_lp_indentation_item( $space_count, $lev, 0, 0, 0 ); } else { $indentation = $space_count; } } } return ( $indentation, $lev, $level_end, $terminal_type, $is_semicolon_terminated, $is_outdented_line ); } } sub set_vertical_tightness_flags { my ( $n, $n_last_line, $ibeg, $iend, $ri_first, $ri_last ) = @_; # Define vertical tightness controls for the nth line of a batch. # We create an array of parameters which tell the vertical aligner # if we should combine this line with the next line to achieve the # desired vertical tightness. The array of parameters contains: # # [0] type: 1=is opening tok 2=is closing tok 3=is opening block brace # [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok # if closing: spaces of padding to use # [2] sequence number of container # [3] valid flag: do not append if this flag is false. Will be # true if appropriate -vt flag is set. Otherwise, Will be # made true only for 2 line container in parens with -lp # # These flags are used by sub set_leading_whitespace in # the vertical aligner my $rvertical_tightness_flags = [ 0, 0, 0, 0, 0, 0 ]; # For non-BLOCK tokens, we will need to examine the next line # too, so we won't consider the last line. if ( $n < $n_last_line ) { # see if last token is an opening token...not a BLOCK... my $ibeg_next = $$ri_first[ $n + 1 ]; my $token_end = $tokens_to_go[$iend]; my $iend_next = $$ri_last[ $n + 1 ]; if ( $type_sequence_to_go[$iend] && !$block_type_to_go[$iend] && $is_opening_token{$token_end} && ( $opening_vertical_tightness{$token_end} > 0 # allow 2-line method call to be closed up || ( $rOpts_line_up_parentheses && $token_end eq '(' && $iend > $ibeg && $types_to_go[ $iend - 1 ] ne 'b' ) ) ) { # avoid multiple jumps in nesting depth in one line if # requested my $ovt = $opening_vertical_tightness{$token_end}; my $iend_next = $$ri_last[ $n + 1 ]; unless ( $ovt < 2 && ( $nesting_depth_to_go[ $iend_next + 1 ] != $nesting_depth_to_go[$ibeg_next] ) ) { # If -vt flag has not been set, mark this as invalid # and aligner will validate it if it sees the closing paren # within 2 lines. my $valid_flag = $ovt; @{$rvertical_tightness_flags} = ( 1, $ovt, $type_sequence_to_go[$iend], $valid_flag ); } } # see if first token of next line is a closing token... # ..and be sure this line does not have a side comment my $token_next = $tokens_to_go[$ibeg_next]; if ( $type_sequence_to_go[$ibeg_next] && !$block_type_to_go[$ibeg_next] && $is_closing_token{$token_next} && $types_to_go[$iend] !~ '#' ) # for safety, shouldn't happen! { my $ovt = $opening_vertical_tightness{$token_next}; my $cvt = $closing_vertical_tightness{$token_next}; if ( # never append a trailing line like )->pack( # because it will throw off later alignment ( $nesting_depth_to_go[$ibeg_next] == $nesting_depth_to_go[ $iend_next + 1 ] + 1 ) && ( $cvt == 2 || ( $container_environment_to_go[$ibeg_next] ne 'LIST' && ( $cvt == 1 # allow closing up 2-line method calls || ( $rOpts_line_up_parentheses && $token_next eq ')' ) ) ) ) ) { # decide which trailing closing tokens to append.. my $ok = 0; if ( $cvt == 2 || $iend_next == $ibeg_next ) { $ok = 1 } else { my $str = join( '', @types_to_go[ $ibeg_next + 1 .. $ibeg_next + 2 ] ); # append closing token if followed by comment or ';' if ( $str =~ /^b?[#;]/ ) { $ok = 1 } } if ($ok) { my $valid_flag = $cvt; @{$rvertical_tightness_flags} = ( 2, $tightness{$token_next} == 2 ? 0 : 1, $type_sequence_to_go[$ibeg_next], $valid_flag, ); } } } # Opening Token Right # If requested, move an isolated trailing opening token to the end of # the previous line which ended in a comma. We could do this # in sub recombine_breakpoints but that would cause problems # with -lp formatting. The problem is that indentation will # quickly move far to the right in nested expressions. By # doing it after indentation has been set, we avoid changes # to the indentation. Actual movement of the token takes place # in sub write_leader_and_string. if ( $opening_token_right{ $tokens_to_go[$ibeg_next] } # previous line is not opening # (use -sot to combine with it) && !$is_opening_token{$token_end} # previous line ended in one of these # (add other cases if necessary; '=>' and '.' are not necessary ##&& ($is_opening_token{$token_end} || $token_end eq ',') && !$block_type_to_go[$ibeg_next] # this is a line with just an opening token && ( $iend_next == $ibeg_next || $iend_next == $ibeg_next + 2 && $types_to_go[$iend_next] eq '#' ) # looks bad if we align vertically with the wrong container && $tokens_to_go[$ibeg] ne $tokens_to_go[$ibeg_next] ) { my $valid_flag = 1; my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0; @{$rvertical_tightness_flags} = ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, ); } # Stacking of opening and closing tokens my $stackable; my $token_beg_next = $tokens_to_go[$ibeg_next]; # patch to make something like 'qw(' behave like an opening paren # (aran.t) if ( $types_to_go[$ibeg_next] eq 'q' ) { if ( $token_beg_next =~ /^qw\s*([\[\(\{])$/ ) { $token_beg_next = $1; } } if ( $is_closing_token{$token_end} && $is_closing_token{$token_beg_next} ) { $stackable = $stack_closing_token{$token_beg_next} unless ( $block_type_to_go[$ibeg_next] ) ; # shouldn't happen; just checking } elsif ($is_opening_token{$token_end} && $is_opening_token{$token_beg_next} ) { $stackable = $stack_opening_token{$token_beg_next} unless ( $block_type_to_go[$ibeg_next] ) ; # shouldn't happen; just checking } if ($stackable) { my $is_semicolon_terminated; if ( $n + 1 == $n_last_line ) { my ( $terminal_type, $i_terminal ) = terminal_type( \@types_to_go, \@block_type_to_go, $ibeg_next, $iend_next ); $is_semicolon_terminated = $terminal_type eq ';' && $nesting_depth_to_go[$iend_next] < $nesting_depth_to_go[$ibeg_next]; } # this must be a line with just an opening token # or end in a semicolon if ( $is_semicolon_terminated || ( $iend_next == $ibeg_next || $iend_next == $ibeg_next + 2 && $types_to_go[$iend_next] eq '#' ) ) { my $valid_flag = 1; my $spaces = ( $types_to_go[ $ibeg_next - 1 ] eq 'b' ) ? 1 : 0; @{$rvertical_tightness_flags} = ( 2, $spaces, $type_sequence_to_go[$ibeg_next], $valid_flag, ); } } } # Check for a last line with isolated opening BLOCK curly elsif ($rOpts_block_brace_vertical_tightness && $ibeg eq $iend && $types_to_go[$iend] eq '{' && $block_type_to_go[$iend] =~ /$block_brace_vertical_tightness_pattern/o ) { @{$rvertical_tightness_flags} = ( 3, $rOpts_block_brace_vertical_tightness, 0, 1 ); } # pack in the sequence numbers of the ends of this line $rvertical_tightness_flags->[4] = get_seqno($ibeg); $rvertical_tightness_flags->[5] = get_seqno($iend); return $rvertical_tightness_flags; } sub get_seqno { # get opening and closing sequence numbers of a token for the vertical # aligner. Assign qw quotes a value to allow qw opening and closing tokens # to be treated somewhat like opening and closing tokens for stacking # tokens by the vertical aligner. my ($ii) = @_; my $seqno = $type_sequence_to_go[$ii]; if ( $types_to_go[$ii] eq 'q' ) { my $SEQ_QW = -1; if ( $ii > 0 ) { $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /^qw\s*[\(\{\[]/ ); } else { if ( !$ending_in_quote ) { $seqno = $SEQ_QW if ( $tokens_to_go[$ii] =~ /[\)\}\]]$/ ); } } } return ($seqno); } { my %is_vertical_alignment_type; my %is_vertical_alignment_keyword; BEGIN { @_ = qw# = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= { ? : => =~ && || // ~~ !~~ #; @is_vertical_alignment_type{@_} = (1) x scalar(@_); @_ = qw(if unless and or err eq ne for foreach while until); @is_vertical_alignment_keyword{@_} = (1) x scalar(@_); } sub set_vertical_alignment_markers { # This routine takes the first step toward vertical alignment of the # lines of output text. It looks for certain tokens which can serve as # vertical alignment markers (such as an '='). # # Method: We look at each token $i in this output batch and set # $matching_token_to_go[$i] equal to those tokens at which we would # accept vertical alignment. # nothing to do if we aren't allowed to change whitespace if ( !$rOpts_add_whitespace ) { for my $i ( 0 .. $max_index_to_go ) { $matching_token_to_go[$i] = ''; } return; } my ( $ri_first, $ri_last ) = @_; # remember the index of last nonblank token before any sidecomment my $i_terminal = $max_index_to_go; if ( $types_to_go[$i_terminal] eq '#' ) { if ( $i_terminal > 0 && $types_to_go[ --$i_terminal ] eq 'b' ) { if ( $i_terminal > 0 ) { --$i_terminal } } } # look at each line of this batch.. my $last_vertical_alignment_before_index; my $vert_last_nonblank_type; my $vert_last_nonblank_token; my $vert_last_nonblank_block_type; my $max_line = @$ri_first - 1; my ( $i, $type, $token, $block_type, $alignment_type ); my ( $ibeg, $iend, $line ); foreach $line ( 0 .. $max_line ) { $ibeg = $$ri_first[$line]; $iend = $$ri_last[$line]; $last_vertical_alignment_before_index = -1; $vert_last_nonblank_type = ''; $vert_last_nonblank_token = ''; $vert_last_nonblank_block_type = ''; # look at each token in this output line.. foreach $i ( $ibeg .. $iend ) { $alignment_type = ''; $type = $types_to_go[$i]; $block_type = $block_type_to_go[$i]; $token = $tokens_to_go[$i]; # check for flag indicating that we should not align # this token if ( $matching_token_to_go[$i] ) { $matching_token_to_go[$i] = ''; next; } #-------------------------------------------------------- # First see if we want to align BEFORE this token #-------------------------------------------------------- # The first possible token that we can align before # is index 2 because: 1) it doesn't normally make sense to # align before the first token and 2) the second # token must be a blank if we are to align before # the third if ( $i < $ibeg + 2 ) { } # must follow a blank token elsif ( $types_to_go[ $i - 1 ] ne 'b' ) { } # align a side comment -- elsif ( $type eq '#' ) { unless ( # it is a static side comment ( $rOpts->{'static-side-comments'} && $token =~ /$static_side_comment_pattern/o ) # or a closing side comment || ( $vert_last_nonblank_block_type && $token =~ /$closing_side_comment_prefix_pattern/o ) ) { $alignment_type = $type; } ## Example of a static side comment } # otherwise, do not align two in a row to create a # blank field elsif ( $last_vertical_alignment_before_index == $i - 2 ) { } # align before one of these keywords # (within a line, since $i>1) elsif ( $type eq 'k' ) { # /^(if|unless|and|or|eq|ne)$/ if ( $is_vertical_alignment_keyword{$token} ) { $alignment_type = $token; } } # align before one of these types.. # Note: add '.' after new vertical aligner is operational elsif ( $is_vertical_alignment_type{$type} ) { $alignment_type = $token; # Do not align a terminal token. Although it might # occasionally look ok to do this, it has been found to be # a good general rule. The main problems are: # (1) that the terminal token (such as an = or :) might get # moved far to the right where it is hard to see because # nothing follows it, and # (2) doing so may prevent other good alignments. if ( $i == $iend || $i >= $i_terminal ) { $alignment_type = ""; } # Do not align leading ': (' or '. ('. This would prevent # alignment in something like the following: # $extra_space .= # ( $input_line_number < 10 ) ? " " # : ( $input_line_number < 100 ) ? " " # : ""; # or # $code = # ( $case_matters ? $accessor : " lc($accessor) " ) # . ( $yesno ? " eq " : " ne " ) if ( $i == $ibeg + 2 && $types_to_go[$ibeg] =~ /^[\.\:]$/ && $types_to_go[ $i - 1 ] eq 'b' ) { $alignment_type = ""; } # For a paren after keyword, only align something like this: # if ( $a ) { &a } # elsif ( $b ) { &b } if ( $token eq '(' && $vert_last_nonblank_type eq 'k' ) { $alignment_type = "" unless $vert_last_nonblank_token =~ /^(if|unless|elsif)$/; } # be sure the alignment tokens are unique # This didn't work well: reason not determined # if ($token ne $type) {$alignment_type .= $type} } # NOTE: This is deactivated because it causes the previous # if/elsif alignment to fail #elsif ( $type eq '}' && $token eq '}' && $block_type_to_go[$i]) #{ $alignment_type = $type; } if ($alignment_type) { $last_vertical_alignment_before_index = $i; } #-------------------------------------------------------- # Next see if we want to align AFTER the previous nonblank #-------------------------------------------------------- # We want to line up ',' and interior ';' tokens, with the added # space AFTER these tokens. (Note: interior ';' is included # because it may occur in short blocks). if ( # we haven't already set it !$alignment_type # and its not the first token of the line && ( $i > $ibeg ) # and it follows a blank && $types_to_go[ $i - 1 ] eq 'b' # and previous token IS one of these: && ( $vert_last_nonblank_type =~ /^[\,\;]$/ ) # and it's NOT one of these && ( $type !~ /^[b\#\)\]\}]$/ ) # then go ahead and align ) { $alignment_type = $vert_last_nonblank_type; } #-------------------------------------------------------- # then store the value #-------------------------------------------------------- $matching_token_to_go[$i] = $alignment_type; if ( $type ne 'b' ) { $vert_last_nonblank_type = $type; $vert_last_nonblank_token = $token; $vert_last_nonblank_block_type = $block_type; } } } } } sub terminal_type { # returns type of last token on this line (terminal token), as follows: # returns # for a full-line comment # returns ' ' for a blank line # otherwise returns final token type my ( $rtype, $rblock_type, $ibeg, $iend ) = @_; # check for full-line comment.. if ( $$rtype[$ibeg] eq '#' ) { return wantarray ? ( $$rtype[$ibeg], $ibeg ) : $$rtype[$ibeg]; } else { # start at end and walk bakwards.. for ( my $i = $iend ; $i >= $ibeg ; $i-- ) { # skip past any side comment and blanks next if ( $$rtype[$i] eq 'b' ); next if ( $$rtype[$i] eq '#' ); # found it..make sure it is a BLOCK termination, # but hide a terminal } after sort/grep/map because it is not # necessarily the end of the line. (terminal.t) my $terminal_type = $$rtype[$i]; if ( $terminal_type eq '}' && ( !$$rblock_type[$i] || ( $is_sort_map_grep_eval_do{ $$rblock_type[$i] } ) ) ) { $terminal_type = 'b'; } return wantarray ? ( $terminal_type, $i ) : $terminal_type; } # empty line return wantarray ? ( ' ', $ibeg ) : ' '; } } { my %is_good_keyword_breakpoint; my %is_lt_gt_le_ge; sub set_bond_strengths { BEGIN { @_ = qw(if unless while until for foreach); @is_good_keyword_breakpoint{@_} = (1) x scalar(@_); @_ = qw(lt gt le ge); @is_lt_gt_le_ge{@_} = (1) x scalar(@_); ############################################################### # NOTE: NO_BREAK's set here are HINTS which may not be honored; # essential NO_BREAKS's must be enforced in section 2, below. ############################################################### # adding NEW_TOKENS: add a left and right bond strength by # mimmicking what is done for an existing token type. You # can skip this step at first and take the default, then # tweak later to get desired results. # The bond strengths should roughly follow precenence order where # possible. If you make changes, please check the results very # carefully on a variety of scripts. # no break around possible filehandle $left_bond_strength{'Z'} = NO_BREAK; $right_bond_strength{'Z'} = NO_BREAK; # never put a bare word on a new line: # example print (STDERR, "bla"); will fail with break after ( $left_bond_strength{'w'} = NO_BREAK; # blanks always have infinite strength to force breaks after real tokens $right_bond_strength{'b'} = NO_BREAK; # try not to break on exponentation @_ = qw" ** .. ... <=> "; @left_bond_strength{@_} = (STRONG) x scalar(@_); @right_bond_strength{@_} = (STRONG) x scalar(@_); # The comma-arrow has very low precedence but not a good break point $left_bond_strength{'=>'} = NO_BREAK; $right_bond_strength{'=>'} = NOMINAL; # ok to break after label $left_bond_strength{'J'} = NO_BREAK; $right_bond_strength{'J'} = NOMINAL; $left_bond_strength{'j'} = STRONG; $right_bond_strength{'j'} = STRONG; $left_bond_strength{'A'} = STRONG; $right_bond_strength{'A'} = STRONG; $left_bond_strength{'->'} = STRONG; $right_bond_strength{'->'} = VERY_STRONG; # breaking AFTER modulus operator is ok: @_ = qw" % "; @left_bond_strength{@_} = (STRONG) x scalar(@_); @right_bond_strength{@_} = ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@_); # Break AFTER math operators * and / @_ = qw" * / x "; @left_bond_strength{@_} = (STRONG) x scalar(@_); @right_bond_strength{@_} = (NOMINAL) x scalar(@_); # Break AFTER weakest math operators + and - # Make them weaker than * but a bit stronger than '.' @_ = qw" + - "; @left_bond_strength{@_} = (STRONG) x scalar(@_); @right_bond_strength{@_} = ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@_); # breaking BEFORE these is just ok: @_ = qw" >> << "; @right_bond_strength{@_} = (STRONG) x scalar(@_); @left_bond_strength{@_} = (NOMINAL) x scalar(@_); # breaking before the string concatenation operator seems best # because it can be hard to see at the end of a line $right_bond_strength{'.'} = STRONG; $left_bond_strength{'.'} = 0.9 * NOMINAL + 0.1 * WEAK; @_ = qw"} ] ) "; @left_bond_strength{@_} = (STRONG) x scalar(@_); @right_bond_strength{@_} = (NOMINAL) x scalar(@_); # make these a little weaker than nominal so that they get # favored for end-of-line characters @_ = qw"!= == =~ !~ ~~ !~~"; @left_bond_strength{@_} = (STRONG) x scalar(@_); @right_bond_strength{@_} = ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@_); # break AFTER these @_ = qw" < > | & >= <="; @left_bond_strength{@_} = (VERY_STRONG) x scalar(@_); @right_bond_strength{@_} = ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@_); # breaking either before or after a quote is ok # but bias for breaking before a quote $left_bond_strength{'Q'} = NOMINAL; $right_bond_strength{'Q'} = NOMINAL + 0.02; $left_bond_strength{'q'} = NOMINAL; $right_bond_strength{'q'} = NOMINAL; # starting a line with a keyword is usually ok $left_bond_strength{'k'} = NOMINAL; # we usually want to bond a keyword strongly to what immediately # follows, rather than leaving it stranded at the end of a line $right_bond_strength{'k'} = STRONG; $left_bond_strength{'G'} = NOMINAL; $right_bond_strength{'G'} = STRONG; # it is good to break AFTER various assignment operators @_ = qw( = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x= ); @left_bond_strength{@_} = (STRONG) x scalar(@_); @right_bond_strength{@_} = ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@_); # break BEFORE '&&' and '||' and '//' # set strength of '||' to same as '=' so that chains like # $a = $b || $c || $d will break before the first '||' $right_bond_strength{'||'} = NOMINAL; $left_bond_strength{'||'} = $right_bond_strength{'='}; # same thing for '//' $right_bond_strength{'//'} = NOMINAL; $left_bond_strength{'//'} = $right_bond_strength{'='}; # set strength of && a little higher than || $right_bond_strength{'&&'} = NOMINAL; $left_bond_strength{'&&'} = $left_bond_strength{'||'} + 0.1; $left_bond_strength{';'} = VERY_STRONG; $right_bond_strength{';'} = VERY_WEAK; $left_bond_strength{'f'} = VERY_STRONG; # make right strength of for ';' a little less than '=' # to make for contents break after the ';' to avoid this: # for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j += # $number_of_fields ) # and make it weaker than ',' and 'and' too $right_bond_strength{'f'} = VERY_WEAK - 0.03; # The strengths of ?/: should be somewhere between # an '=' and a quote (NOMINAL), # make strength of ':' slightly less than '?' to help # break long chains of ? : after the colons $left_bond_strength{':'} = 0.4 * WEAK + 0.6 * NOMINAL; $right_bond_strength{':'} = NO_BREAK; $left_bond_strength{'?'} = $left_bond_strength{':'} + 0.01; $right_bond_strength{'?'} = NO_BREAK; $left_bond_strength{','} = VERY_STRONG; $right_bond_strength{','} = VERY_WEAK; # Set bond strengths of certain keywords # make 'or', 'err', 'and' slightly weaker than a ',' $left_bond_strength{'and'} = VERY_WEAK - 0.01; $left_bond_strength{'or'} = VERY_WEAK - 0.02; $left_bond_strength{'err'} = VERY_WEAK - 0.02; $left_bond_strength{'xor'} = NOMINAL; $right_bond_strength{'and'} = NOMINAL; $right_bond_strength{'or'} = NOMINAL; $right_bond_strength{'err'} = NOMINAL; $right_bond_strength{'xor'} = STRONG; } # patch-its always ok to break at end of line $nobreak_to_go[$max_index_to_go] = 0; # adding a small 'bias' to strengths is a simple way to make a line # break at the first of a sequence of identical terms. For example, # to force long string of conditional operators to break with # each line ending in a ':', we can add a small number to the bond # strength of each ':' my $colon_bias = 0; my $amp_bias = 0; my $bar_bias = 0; my $and_bias = 0; my $or_bias = 0; my $dot_bias = 0; my $f_bias = 0; my $code_bias = -.01; my $type = 'b'; my $token = ' '; my $last_type; my $last_nonblank_type = $type; my $last_nonblank_token = $token; my $delta_bias = 0.0001; my $list_str = $left_bond_strength{'?'}; my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token, $next_nonblank_type, $next_token, $next_type, $total_nesting_depth, ); # preliminary loop to compute bond strengths for ( my $i = 0 ; $i <= $max_index_to_go ; $i++ ) { $last_type = $type; if ( $type ne 'b' ) { $last_nonblank_type = $type; $last_nonblank_token = $token; } $type = $types_to_go[$i]; # strength on both sides of a blank is the same if ( $type eq 'b' && $last_type ne 'b' ) { $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ]; next; } $token = $tokens_to_go[$i]; $block_type = $block_type_to_go[$i]; $i_next = $i + 1; $next_type = $types_to_go[$i_next]; $next_token = $tokens_to_go[$i_next]; $total_nesting_depth = $nesting_depth_to_go[$i_next]; $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); $next_nonblank_type = $types_to_go[$i_next_nonblank]; $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; # Some token chemistry... The decision about where to break a # line depends upon a "bond strength" between tokens. The LOWER # the bond strength, the MORE likely a break. The strength # values are based on trial-and-error, and need to be tweaked # occasionally to get desired results. Things to keep in mind # are: # 1. relative strengths are important. small differences # in strengths can make big formatting differences. # 2. each indentation level adds one unit of bond strength # 3. a value of NO_BREAK makes an unbreakable bond # 4. a value of VERY_WEAK is the strength of a ',' # 5. values below NOMINAL are considered ok break points # 6. values above NOMINAL are considered poor break points # We are computing the strength of the bond between the current # token and the NEXT token. my $bond_str = VERY_STRONG; # a default, high strength #--------------------------------------------------------------- # section 1: # use minimum of left and right bond strengths if defined; # digraphs and trigraphs like to break on their left #--------------------------------------------------------------- my $bsr = $right_bond_strength{$type}; if ( !defined($bsr) ) { if ( $is_digraph{$type} || $is_trigraph{$type} ) { $bsr = STRONG; } else { $bsr = VERY_STRONG; } } # define right bond strengths of certain keywords if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) { $bsr = $right_bond_strength{$token}; } elsif ( $token eq 'ne' or $token eq 'eq' ) { $bsr = NOMINAL; } my $bsl = $left_bond_strength{$next_nonblank_type}; # set terminal bond strength to the nominal value # this will cause good preceding breaks to be retained if ( $i_next_nonblank > $max_index_to_go ) { $bsl = NOMINAL; } if ( !defined($bsl) ) { if ( $is_digraph{$next_nonblank_type} || $is_trigraph{$next_nonblank_type} ) { $bsl = WEAK; } else { $bsl = VERY_STRONG; } } # define right bond strengths of certain keywords if ( $next_nonblank_type eq 'k' && defined( $left_bond_strength{$next_nonblank_token} ) ) { $bsl = $left_bond_strength{$next_nonblank_token}; } elsif ($next_nonblank_token eq 'ne' or $next_nonblank_token eq 'eq' ) { $bsl = NOMINAL; } elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) { $bsl = 0.9 * NOMINAL + 0.1 * STRONG; } # Note: it might seem that we would want to keep a NO_BREAK if # either token has this value. This didn't work, because in an # arrow list, it prevents the comma from separating from the # following bare word (which is probably quoted by its arrow). # So necessary NO_BREAK's have to be handled as special cases # in the final section. $bond_str = ( $bsr < $bsl ) ? $bsr : $bsl; my $bond_str_1 = $bond_str; #--------------------------------------------------------------- # section 2: # special cases #--------------------------------------------------------------- # allow long lines before final { in an if statement, as in: # if (.......... # ..........) # { # # Otherwise, the line before the { tends to be too short. if ( $type eq ')' ) { if ( $next_nonblank_type eq '{' ) { $bond_str = VERY_WEAK + 0.03; } } elsif ( $type eq '(' ) { if ( $next_nonblank_type eq '{' ) { $bond_str = NOMINAL; } } # break on something like '} (', but keep this stronger than a ',' # example is in 'howe.pl' elsif ( $type eq 'R' or $type eq '}' ) { if ( $next_nonblank_type eq '(' ) { $bond_str = 0.8 * VERY_WEAK + 0.2 * WEAK; } } #----------------------------------------------------------------- # adjust bond strength bias #----------------------------------------------------------------- # TESTING: add any bias set by sub scan_list at old comma # break points. elsif ( $type eq ',' ) { $bond_str += $bond_strength_to_go[$i]; } elsif ( $type eq 'f' ) { $bond_str += $f_bias; $f_bias += $delta_bias; } # in long ?: conditionals, bias toward just one set per line (colon.t) elsif ( $type eq ':' ) { if ( !$want_break_before{$type} ) { $bond_str += $colon_bias; $colon_bias += $delta_bias; } } if ( $next_nonblank_type eq ':' && $want_break_before{$next_nonblank_type} ) { $bond_str += $colon_bias; $colon_bias += $delta_bias; } # if leading '.' is used, align all but 'short' quotes; # the idea is to not place something like "\n" on a single line. elsif ( $next_nonblank_type eq '.' ) { if ( $want_break_before{'.'} ) { unless ( $last_nonblank_type eq '.' && ( length($token) <= $rOpts_short_concatenation_item_length ) && ( $token !~ /^[\)\]\}]$/ ) ) { $dot_bias += $delta_bias; } $bond_str += $dot_bias; } } elsif ($next_nonblank_type eq '&&' && $want_break_before{$next_nonblank_type} ) { $bond_str += $amp_bias; $amp_bias += $delta_bias; } elsif ($next_nonblank_type eq '||' && $want_break_before{$next_nonblank_type} ) { $bond_str += $bar_bias; $bar_bias += $delta_bias; } elsif ( $next_nonblank_type eq 'k' ) { if ( $next_nonblank_token eq 'and' && $want_break_before{$next_nonblank_token} ) { $bond_str += $and_bias; $and_bias += $delta_bias; } elsif ($next_nonblank_token =~ /^(or|err)$/ && $want_break_before{$next_nonblank_token} ) { $bond_str += $or_bias; $or_bias += $delta_bias; } # FIXME: needs more testing elsif ( $is_keyword_returning_list{$next_nonblank_token} ) { $bond_str = $list_str if ( $bond_str > $list_str ); } elsif ( $token eq 'err' && !$want_break_before{$token} ) { $bond_str += $or_bias; $or_bias += $delta_bias; } } if ( $type eq ':' && !$want_break_before{$type} ) { $bond_str += $colon_bias; $colon_bias += $delta_bias; } elsif ( $type eq '&&' && !$want_break_before{$type} ) { $bond_str += $amp_bias; $amp_bias += $delta_bias; } elsif ( $type eq '||' && !$want_break_before{$type} ) { $bond_str += $bar_bias; $bar_bias += $delta_bias; } elsif ( $type eq 'k' ) { if ( $token eq 'and' && !$want_break_before{$token} ) { $bond_str += $and_bias; $and_bias += $delta_bias; } elsif ( $token eq 'or' && !$want_break_before{$token} ) { $bond_str += $or_bias; $or_bias += $delta_bias; } } # keep matrix and hash indices together # but make them a little below STRONG to allow breaking open # something like {'some-word'}{'some-very-long-word'} at the }{ # (bracebrk.t) if ( ( $type eq ']' or $type eq 'R' ) && ( $next_nonblank_type eq '[' or $next_nonblank_type eq 'L' ) ) { $bond_str = 0.9 * STRONG + 0.1 * NOMINAL; } if ( $next_nonblank_token =~ /^->/ ) { # increase strength to the point where a break in the following # will be after the opening paren rather than at the arrow: # $a->$b($c); if ( $type eq 'i' ) { $bond_str = 1.45 * STRONG; } elsif ( $type =~ /^[\)\]\}R]$/ ) { $bond_str = 0.1 * STRONG + 0.9 * NOMINAL; } # otherwise make strength before an '->' a little over a '+' else { if ( $bond_str <= NOMINAL ) { $bond_str = NOMINAL + 0.01; } } } if ( $token eq ')' && $next_nonblank_token eq '[' ) { $bond_str = 0.2 * STRONG + 0.8 * NOMINAL; } # map1.t -- correct for a quirk in perl if ( $token eq '(' && $next_nonblank_type eq 'i' && $last_nonblank_type eq 'k' && $is_sort_map_grep{$last_nonblank_token} ) # /^(sort|map|grep)$/ ) { $bond_str = NO_BREAK; } # extrude.t: do not break before paren at: # -l pid_filename( if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) { $bond_str = NO_BREAK; } # good to break after end of code blocks if ( $type eq '}' && $block_type ) { $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias; $code_bias += $delta_bias; } if ( $type eq 'k' ) { # allow certain control keywords to stand out if ( $next_nonblank_type eq 'k' && $is_last_next_redo_return{$token} ) { $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK; } # Don't break after keyword my. This is a quick fix for a # rare problem with perl. An example is this line from file # Container.pm: # foreach my $question( Debian::DebConf::ConfigDb::gettree( $this->{'question'} ) ) if ( $token eq 'my' ) { $bond_str = NO_BREAK; } } # good to break before 'if', 'unless', etc if ( $is_if_brace_follower{$next_nonblank_token} ) { $bond_str = VERY_WEAK; } if ( $next_nonblank_type eq 'k' ) { # keywords like 'unless', 'if', etc, within statements # make good breaks if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) { $bond_str = VERY_WEAK / 1.05; } } # try not to break before a comma-arrow elsif ( $next_nonblank_type eq '=>' ) { if ( $bond_str < STRONG ) { $bond_str = STRONG } } #---------------------------------------------------------------------- # only set NO_BREAK's from here on #---------------------------------------------------------------------- if ( $type eq 'C' or $type eq 'U' ) { # use strict requires that bare word and => not be separated if ( $next_nonblank_type eq '=>' ) { $bond_str = NO_BREAK; } # Never break between a bareword and a following paren because # perl may give an error. For example, if a break is placed # between 'to_filehandle' and its '(' the following line will # give a syntax error [Carp.pm]: my( $no) =fileno( # to_filehandle( $in)) ; if ( $next_nonblank_token eq '(' ) { $bond_str = NO_BREAK; } } # use strict requires that bare word within braces not start new line elsif ( $type eq 'L' ) { if ( $next_nonblank_type eq 'w' ) { $bond_str = NO_BREAK; } } # in older version of perl, use strict can cause problems with # breaks before bare words following opening parens. For example, # this will fail under older versions if a break is made between # '(' and 'MAIL': # use strict; # open( MAIL, "a long filename or command"); # close MAIL; elsif ( $type eq '{' ) { if ( $token eq '(' && $next_nonblank_type eq 'w' ) { # but it's fine to break if the word is followed by a '=>' # or if it is obviously a sub call my $i_next_next_nonblank = $i_next_nonblank + 1; my $next_next_type = $types_to_go[$i_next_next_nonblank]; if ( $next_next_type eq 'b' && $i_next_nonblank < $max_index_to_go ) { $i_next_next_nonblank++; $next_next_type = $types_to_go[$i_next_next_nonblank]; } ##if ( $next_next_type ne '=>' ) { # these are ok: '->xxx', '=>', '(' # We'll check for an old breakpoint and keep a leading # bareword if it was that way in the input file. # Presumably it was ok that way. For example, the # following would remain unchanged: # # @months = ( # January, February, March, April, # May, June, July, August, # September, October, November, December, # ); # # This should be sufficient: if ( !$old_breakpoint_to_go[$i] && ( $next_next_type eq ',' || $next_next_type eq '}' ) ) { $bond_str = NO_BREAK; } } } elsif ( $type eq 'w' ) { if ( $next_nonblank_type eq 'R' ) { $bond_str = NO_BREAK; } # use strict requires that bare word and => not be separated if ( $next_nonblank_type eq '=>' ) { $bond_str = NO_BREAK; } } # in fact, use strict hates bare words on any new line. For # example, a break before the underscore here provokes the # wrath of use strict: # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) { elsif ( $type eq 'F' ) { $bond_str = NO_BREAK; } # use strict does not allow separating type info from trailing { } # testfile is readmail.pl elsif ( $type eq 't' or $type eq 'i' ) { if ( $next_nonblank_type eq 'L' ) { $bond_str = NO_BREAK; } } # Do not break between a possible filehandle and a ? or / and do # not introduce a break after it if there is no blank # (extrude.t) elsif ( $type eq 'Z' ) { # dont break.. if ( # if there is no blank and we do not want one. Examples: # print $x++ # do not break after $x # print HTML"HELLO" # break ok after HTML ( $next_type ne 'b' && defined( $want_left_space{$next_type} ) && $want_left_space{$next_type} == WS_NO ) # or we might be followed by the start of a quote || $next_nonblank_type =~ /^[\/\?]$/ ) { $bond_str = NO_BREAK; } } # Do not break before a possible file handle if ( $next_nonblank_type eq 'Z' ) { $bond_str = NO_BREAK; } # As a defensive measure, do not break between a '(' and a # filehandle. In some cases, this can cause an error. For # example, the following program works: # my $msg="hi!\n"; # print # ( STDOUT # $msg # ); # # But this program fails: # my $msg="hi!\n"; # print # ( # STDOUT # $msg # ); # # This is normally only a problem with the 'extrude' option if ( $next_nonblank_type eq 'Y' && $token eq '(' ) { $bond_str = NO_BREAK; } # Breaking before a ++ can cause perl to guess wrong. For # example the following line will cause a syntax error # with -extrude if we break between '$i' and '++' [fixstyle2] # print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) ); elsif ( $next_nonblank_type eq '++' ) { $bond_str = NO_BREAK; } # Breaking before a ? before a quote can cause trouble if # they are not separated by a blank. # Example: a syntax error occurs if you break before the ? here # my$logic=join$all?' && ':' || ',@regexps; # From: Professional_Perl_Programming_Code/multifind.pl elsif ( $next_nonblank_type eq '?' ) { $bond_str = NO_BREAK if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' ); } # Breaking before a . followed by a number # can cause trouble if there is no intervening space # Example: a syntax error occurs if you break before the .2 here # $str .= pack($endian.2, ensurrogate($ord)); # From: perl58/Unicode.pm elsif ( $next_nonblank_type eq '.' ) { $bond_str = NO_BREAK if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' ); } # patch to put cuddled elses back together when on multiple # lines, as in: } \n else \n { \n if ($rOpts_cuddled_else) { if ( ( $token eq 'else' ) && ( $next_nonblank_type eq '{' ) || ( $type eq '}' ) && ( $next_nonblank_token eq 'else' ) ) { $bond_str = NO_BREAK; } } # keep '}' together with ';' if ( ( $token eq '}' ) && ( $next_nonblank_type eq ';' ) ) { $bond_str = NO_BREAK; } # never break between sub name and opening paren if ( ( $type eq 'w' ) && ( $next_nonblank_token eq '(' ) ) { $bond_str = NO_BREAK; } #--------------------------------------------------------------- # section 3: # now take nesting depth into account #--------------------------------------------------------------- # final strength incorporates the bond strength and nesting depth my $strength; if ( defined($bond_str) && !$nobreak_to_go[$i] ) { if ( $total_nesting_depth > 0 ) { $strength = $bond_str + $total_nesting_depth; } else { $strength = $bond_str; } } else { $strength = NO_BREAK; } # always break after side comment if ( $type eq '#' ) { $strength = 0 } $bond_strength_to_go[$i] = $strength; FORMATTER_DEBUG_FLAG_BOND && do { my $str = substr( $token, 0, 15 ); $str .= ' ' x ( 16 - length($str) ); print "BOND: i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str -> $strength \n"; }; } } } sub pad_array_to_go { # to simplify coding in scan_list and set_bond_strengths, it helps # to create some extra blank tokens at the end of the arrays $tokens_to_go[ $max_index_to_go + 1 ] = ''; $tokens_to_go[ $max_index_to_go + 2 ] = ''; $types_to_go[ $max_index_to_go + 1 ] = 'b'; $types_to_go[ $max_index_to_go + 2 ] = 'b'; $nesting_depth_to_go[ $max_index_to_go + 1 ] = $nesting_depth_to_go[$max_index_to_go]; # /^[R\}\)\]]$/ if ( $is_closing_type{ $types_to_go[$max_index_to_go] } ) { if ( $nesting_depth_to_go[$max_index_to_go] <= 0 ) { # shouldn't happen: unless ( get_saw_brace_error() ) { warning( "Program bug in scan_list: hit nesting error which should have been caught\n" ); report_definite_bug(); } } else { $nesting_depth_to_go[ $max_index_to_go + 1 ] -= 1; } } # /^[L\{\(\[]$/ elsif ( $is_opening_type{ $types_to_go[$max_index_to_go] } ) { $nesting_depth_to_go[ $max_index_to_go + 1 ] += 1; } } { # begin scan_list my ( $block_type, $current_depth, $depth, $i, $i_last_nonblank_token, $last_colon_sequence_number, $last_nonblank_token, $last_nonblank_type, $last_old_breakpoint_count, $minimum_depth, $next_nonblank_block_type, $next_nonblank_token, $next_nonblank_type, $old_breakpoint_count, $starting_breakpoint_count, $starting_depth, $token, $type, $type_sequence, ); my ( @breakpoint_stack, @breakpoint_undo_stack, @comma_index, @container_type, @identifier_count_stack, @index_before_arrow, @interrupted_list, @item_count_stack, @last_comma_index, @last_dot_index, @last_nonblank_type, @old_breakpoint_count_stack, @opening_structure_index_stack, @rfor_semicolon_list, @has_old_logical_breakpoints, @rand_or_list, @i_equals, ); # routine to define essential variables when we go 'up' to # a new depth sub check_for_new_minimum_depth { my $depth = shift; if ( $depth < $minimum_depth ) { $minimum_depth = $depth; # these arrays need not retain values between calls $breakpoint_stack[$depth] = $starting_breakpoint_count; $container_type[$depth] = ""; $identifier_count_stack[$depth] = 0; $index_before_arrow[$depth] = -1; $interrupted_list[$depth] = 1; $item_count_stack[$depth] = 0; $last_nonblank_type[$depth] = ""; $opening_structure_index_stack[$depth] = -1; $breakpoint_undo_stack[$depth] = undef; $comma_index[$depth] = undef; $last_comma_index[$depth] = undef; $last_dot_index[$depth] = undef; $old_breakpoint_count_stack[$depth] = undef; $has_old_logical_breakpoints[$depth] = 0; $rand_or_list[$depth] = []; $rfor_semicolon_list[$depth] = []; $i_equals[$depth] = -1; # these arrays must retain values between calls if ( !defined( $has_broken_sublist[$depth] ) ) { $dont_align[$depth] = 0; $has_broken_sublist[$depth] = 0; $want_comma_break[$depth] = 0; } } } # routine to decide which commas to break at within a container; # returns: # $bp_count = number of comma breakpoints set # $do_not_break_apart = a flag indicating if container need not # be broken open sub set_comma_breakpoints { my $dd = shift; my $bp_count = 0; my $do_not_break_apart = 0; # anything to do? if ( $item_count_stack[$dd] ) { # handle commas not in containers... if ( $dont_align[$dd] ) { do_uncontained_comma_breaks($dd); } # handle commas within containers... else { my $fbc = $forced_breakpoint_count; # always open comma lists not preceded by keywords, # barewords, identifiers (that is, anything that doesn't # look like a function call) my $must_break_open = $last_nonblank_type[$dd] !~ /^[kwiU]$/; set_comma_breakpoints_do( $dd, $opening_structure_index_stack[$dd], $i, $item_count_stack[$dd], $identifier_count_stack[$dd], $comma_index[$dd], $next_nonblank_type, $container_type[$dd], $interrupted_list[$dd], \$do_not_break_apart, $must_break_open, ); $bp_count = $forced_breakpoint_count - $fbc; $do_not_break_apart = 0 if $must_break_open; } } return ( $bp_count, $do_not_break_apart ); } sub do_uncontained_comma_breaks { # Handle commas not in containers... # This is a catch-all routine for commas that we # don't know what to do with because the don't fall # within containers. We will bias the bond strength # to break at commas which ended lines in the input # file. This usually works better than just trying # to put as many items on a line as possible. A # downside is that if the input file is garbage it # won't work very well. However, the user can always # prevent following the old breakpoints with the # -iob flag. my $dd = shift; my $bias = -.01; foreach my $ii ( @{ $comma_index[$dd] } ) { if ( $old_breakpoint_to_go[$ii] ) { $bond_strength_to_go[$ii] = $bias; # reduce bias magnitude to force breaks in order $bias *= 0.99; } } # Also put a break before the first comma if # (1) there was a break there in the input, and # (2) that was exactly one previous break in the input # # For example, we will follow the user and break after # 'print' in this snippet: # print # "conformability (Not the same dimension)\n", # "\t", $have, " is ", text_unit($hu), "\n", # "\t", $want, " is ", text_unit($wu), "\n", # ; my $i_first_comma = $comma_index[$dd]->[0]; if ( $old_breakpoint_to_go[$i_first_comma] ) { my $level_comma = $levels_to_go[$i_first_comma]; my $ibreak = -1; my $obp_count = 0; for ( my $ii = $i_first_comma - 1 ; $ii >= 0 ; $ii -= 1 ) { if ( $old_breakpoint_to_go[$ii] ) { $obp_count++; last if ( $obp_count > 1 ); $ibreak = $ii if ( $levels_to_go[$ii] == $level_comma ); } } if ( $ibreak >= 0 && $obp_count == 1 ) { set_forced_breakpoint($ibreak); } } } my %is_logical_container; BEGIN { @_ = qw# if elsif unless while and or err not && | || ? : ! #; @is_logical_container{@_} = (1) x scalar(@_); } sub set_for_semicolon_breakpoints { my $dd = shift; foreach ( @{ $rfor_semicolon_list[$dd] } ) { set_forced_breakpoint($_); } } sub set_logical_breakpoints { my $dd = shift; if ( $item_count_stack[$dd] == 0 && $is_logical_container{ $container_type[$dd] } # TESTING: || $has_old_logical_breakpoints[$dd] ) { # Look for breaks in this order: # 0 1 2 3 # or and || && foreach my $i ( 0 .. 3 ) { if ( $rand_or_list[$dd][$i] ) { foreach ( @{ $rand_or_list[$dd][$i] } ) { set_forced_breakpoint($_); } # break at any 'if' and 'unless' too foreach ( @{ $rand_or_list[$dd][4] } ) { set_forced_breakpoint($_); } $rand_or_list[$dd] = []; last; } } } } sub is_unbreakable_container { # never break a container of one of these types # because bad things can happen (map1.t) my $dd = shift; $is_sort_map_grep{ $container_type[$dd] }; } sub scan_list { # This routine is responsible for setting line breaks for all lists, # so that hierarchical structure can be displayed and so that list # items can be vertically aligned. The output of this routine is # stored in the array @forced_breakpoint_to_go, which is used to set # final breakpoints. $starting_depth = $nesting_depth_to_go[0]; $block_type = ' '; $current_depth = $starting_depth; $i = -1; $last_colon_sequence_number = -1; $last_nonblank_token = ';'; $last_nonblank_type = ';'; $last_nonblank_block_type = ' '; $last_old_breakpoint_count = 0; $minimum_depth = $current_depth + 1; # forces update in check below $old_breakpoint_count = 0; $starting_breakpoint_count = $forced_breakpoint_count; $token = ';'; $type = ';'; $type_sequence = ''; check_for_new_minimum_depth($current_depth); my $is_long_line = excess_line_length( 0, $max_index_to_go ) > 0; my $want_previous_breakpoint = -1; my $saw_good_breakpoint; my $i_line_end = -1; my $i_line_start = -1; # loop over all tokens in this batch while ( ++$i <= $max_index_to_go ) { if ( $type ne 'b' ) { $i_last_nonblank_token = $i - 1; $last_nonblank_type = $type; $last_nonblank_token = $token; $last_nonblank_block_type = $block_type; } $type = $types_to_go[$i]; $block_type = $block_type_to_go[$i]; $token = $tokens_to_go[$i]; $type_sequence = $type_sequence_to_go[$i]; my $next_type = $types_to_go[ $i + 1 ]; my $next_token = $tokens_to_go[ $i + 1 ]; my $i_next_nonblank = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 ); $next_nonblank_type = $types_to_go[$i_next_nonblank]; $next_nonblank_token = $tokens_to_go[$i_next_nonblank]; $next_nonblank_block_type = $block_type_to_go[$i_next_nonblank]; # set break if flag was set if ( $want_previous_breakpoint >= 0 ) { set_forced_breakpoint($want_previous_breakpoint); $want_previous_breakpoint = -1; } $last_old_breakpoint_count = $old_breakpoint_count; if ( $old_breakpoint_to_go[$i] ) { $i_line_end = $i; $i_line_start = $i_next_nonblank; $old_breakpoint_count++; # Break before certain keywords if user broke there and # this is a 'safe' break point. The idea is to retain # any preferred breaks for sequential list operations, # like a schwartzian transform. if ($rOpts_break_at_old_keyword_breakpoints) { if ( $next_nonblank_type eq 'k' && $is_keyword_returning_list{$next_nonblank_token} && ( $type =~ /^[=\)\]\}Riw]$/ || $type eq 'k' && $is_keyword_returning_list{$token} ) ) { # we actually have to set this break next time through # the loop because if we are at a closing token (such # as '}') which forms a one-line block, this break might # get undone. $want_previous_breakpoint = $i; } } } next if ( $type eq 'b' ); $depth = $nesting_depth_to_go[ $i + 1 ]; # safety check - be sure we always break after a comment # Shouldn't happen .. an error here probably means that the # nobreak flag did not get turned off correctly during # formatting. if ( $type eq '#' ) { if ( $i != $max_index_to_go ) { warning( "Non-fatal program bug: backup logic needed to break after a comment\n" ); report_definite_bug(); $nobreak_to_go[$i] = 0; set_forced_breakpoint($i); } } # Force breakpoints at certain tokens in long lines. # Note that such breakpoints will be undone later if these tokens # are fully contained within parens on a line. if ( # break before a keyword within a line $type eq 'k' && $i > 0 # if one of these keywords: && $token =~ /^(if|unless|while|until|for)$/ # but do not break at something like '1 while' && ( $last_nonblank_type ne 'n' || $i > 2 ) # and let keywords follow a closing 'do' brace && $last_nonblank_block_type ne 'do' && ( $is_long_line # or container is broken (by side-comment, etc) || ( $next_nonblank_token eq '(' && $mate_index_to_go[$i_next_nonblank] < $i ) ) ) { set_forced_breakpoint( $i - 1 ); } # remember locations of '||' and '&&' for possible breaks if we # decide this is a long logical expression. if ( $type eq '||' ) { push @{ $rand_or_list[$depth][2] }, $i; ++$has_old_logical_breakpoints[$depth] if ( ( $i == $i_line_start || $i == $i_line_end ) && $rOpts_break_at_old_logical_breakpoints ); } elsif ( $type eq '&&' ) { push @{ $rand_or_list[$depth][3] }, $i; ++$has_old_logical_breakpoints[$depth] if ( ( $i == $i_line_start || $i == $i_line_end ) && $rOpts_break_at_old_logical_breakpoints ); } elsif ( $type eq 'f' ) { push @{ $rfor_semicolon_list[$depth] }, $i; } elsif ( $type eq 'k' ) { if ( $token eq 'and' ) { push @{ $rand_or_list[$depth][1] }, $i; ++$has_old_logical_breakpoints[$depth] if ( ( $i == $i_line_start || $i == $i_line_end ) && $rOpts_break_at_old_logical_breakpoints ); } # break immediately at 'or's which are probably not in a logical # block -- but we will break in logical breaks below so that # they do not add to the forced_breakpoint_count elsif ( $token eq 'or' ) { push @{ $rand_or_list[$depth][0] }, $i; ++$has_old_logical_breakpoints[$depth] if ( ( $i == $i_line_start || $i == $i_line_end ) && $rOpts_break_at_old_logical_breakpoints ); if ( $is_logical_container{ $container_type[$depth] } ) { } else { if ($is_long_line) { set_forced_breakpoint($i) } elsif ( ( $i == $i_line_start || $i == $i_line_end ) && $rOpts_break_at_old_logical_breakpoints ) { $saw_good_breakpoint = 1; } } } elsif ( $token eq 'if' || $token eq 'unless' ) { push @{ $rand_or_list[$depth][4] }, $i; if ( ( $i == $i_line_start || $i == $i_line_end ) && $rOpts_break_at_old_logical_breakpoints ) { set_forced_breakpoint($i); } } } elsif ( $is_assignment{$type} ) { $i_equals[$depth] = $i; } if ($type_sequence) { # handle any postponed closing breakpoints if ( $token =~ /^[\)\]\}\:]$/ ) { if ( $type eq ':' ) { $last_colon_sequence_number = $type_sequence; # TESTING: retain break at a ':' line break if ( ( $i == $i_line_start || $i == $i_line_end ) && $rOpts_break_at_old_ternary_breakpoints ) { # TESTING: set_forced_breakpoint($i); # break at previous '=' if ( $i_equals[$depth] > 0 ) { set_forced_breakpoint( $i_equals[$depth] ); $i_equals[$depth] = -1; } } } if ( defined( $postponed_breakpoint{$type_sequence} ) ) { my $inc = ( $type eq ':' ) ? 0 : 1; set_forced_breakpoint( $i - $inc ); delete $postponed_breakpoint{$type_sequence}; } } # set breaks at ?/: if they will get separated (and are # not a ?/: chain), or if the '?' is at the end of the # line elsif ( $token eq '?' ) { my $i_colon = $mate_index_to_go[$i]; if ( $i_colon <= 0 # the ':' is not in this batch || $i == 0 # this '?' is the first token of the line || $i == $max_index_to_go # or this '?' is the last token ) { # don't break at a '?' if preceded by ':' on # this line of previous ?/: pair on this line. # This is an attempt to preserve a chain of ?/: # expressions (elsif2.t). And don't break if # this has a side comment. set_forced_breakpoint($i) unless ( $type_sequence == ( $last_colon_sequence_number + TYPE_SEQUENCE_INCREMENT ) || $tokens_to_go[$max_index_to_go] eq '#' ); set_closing_breakpoint($i); } } } #print "LISTX sees: i=$i type=$type tok=$token block=$block_type depth=$depth\n"; #------------------------------------------------------------ # Handle Increasing Depth.. # # prepare for a new list when depth increases # token $i is a '(','{', or '[' #------------------------------------------------------------ if ( $depth > $current_depth ) { $breakpoint_stack[$depth] = $forced_breakpoint_count; $breakpoint_undo_stack[$depth] = $forced_breakpoint_undo_count; $has_broken_sublist[$depth] = 0; $identifier_count_stack[$depth] = 0; $index_before_arrow[$depth] = -1; $interrupted_list[$depth] = 0; $item_count_stack[$depth] = 0; $last_comma_index[$depth] = undef; $last_dot_index[$depth] = undef; $last_nonblank_type[$depth] = $last_nonblank_type; $old_breakpoint_count_stack[$depth] = $old_breakpoint_count; $opening_structure_index_stack[$depth] = $i; $rand_or_list[$depth] = []; $rfor_semicolon_list[$depth] = []; $i_equals[$depth] = -1; $want_comma_break[$depth] = 0; $container_type[$depth] = ( $last_nonblank_type =~ /^(k|=>|&&|\|\||\?|\:|\.)$/ ) ? $last_nonblank_token : ""; $has_old_logical_breakpoints[$depth] = 0; # if line ends here then signal closing token to break if ( $next_nonblank_type eq 'b' || $next_nonblank_type eq '#' ) { set_closing_breakpoint($i); } # Not all lists of values should be vertically aligned.. $dont_align[$depth] = # code BLOCKS are handled at a higher level ( $block_type ne "" ) # certain paren lists || ( $type eq '(' ) && ( # it does not usually look good to align a list of # identifiers in a parameter list, as in: # my($var1, $var2, ...) # (This test should probably be refined, for now I'm just # testing for any keyword) ( $last_nonblank_type eq 'k' ) # a trailing '(' usually indicates a non-list || ( $next_nonblank_type eq '(' ) ); # patch to outdent opening brace of long if/for/.. # statements (like this one). See similar coding in # set_continuation breaks. We have also catch it here for # short line fragments which otherwise will not go through # set_continuation_breaks. if ( $block_type # if we have the ')' but not its '(' in this batch.. && ( $last_nonblank_token eq ')' ) && $mate_index_to_go[$i_last_nonblank_token] < 0 # and user wants brace to left && !$rOpts->{'opening-brace-always-on-right'} && ( $type eq '{' ) # should be true && ( $token eq '{' ) # should be true ) { set_forced_breakpoint( $i - 1 ); } } #------------------------------------------------------------ # Handle Decreasing Depth.. # # finish off any old list when depth decreases # token $i is a ')','}', or ']' #------------------------------------------------------------ elsif ( $depth < $current_depth ) { check_for_new_minimum_depth($depth); # force all outer logical containers to break after we see on # old breakpoint $has_old_logical_breakpoints[$depth] ||= $has_old_logical_breakpoints[$current_depth]; # Patch to break between ') {' if the paren list is broken. # There is similar logic in set_continuation_breaks for # non-broken lists. if ( $token eq ')' && $next_nonblank_block_type && $interrupted_list[$current_depth] && $next_nonblank_type eq '{' && !$rOpts->{'opening-brace-always-on-right'} ) { set_forced_breakpoint($i); } #print "LISTY sees: i=$i type=$type tok=$token block=$block_type depth=$depth next=$next_nonblank_type next_block=$next_nonblank_block_type inter=$interrupted_list[$current_depth]\n"; # set breaks at commas if necessary my ( $bp_count, $do_not_break_apart ) = set_comma_breakpoints($current_depth); my $i_opening = $opening_structure_index_stack[$current_depth]; my $saw_opening_structure = ( $i_opening >= 0 ); # this term is long if we had to break at interior commas.. my $is_long_term = $bp_count > 0; # ..or if the length between opening and closing parens exceeds # allowed line length if ( !$is_long_term && $saw_opening_structure ) { my $i_opening_minus = find_token_starting_list($i_opening); # Note: we have to allow for one extra space after a # closing token so that we do not strand a comma or # semicolon, hence the '>=' here (oneline.t) $is_long_term = excess_line_length( $i_opening_minus, $i ) >= 0; } # We've set breaks after all comma-arrows. Now we have to # undo them if this can be a one-line block # (the only breakpoints set will be due to comma-arrows) if ( # user doesn't require breaking after all comma-arrows ( $rOpts_comma_arrow_breakpoints != 0 ) # and if the opening structure is in this batch && $saw_opening_structure # and either on the same old line && ( $old_breakpoint_count_stack[$current_depth] == $last_old_breakpoint_count # or user wants to form long blocks with arrows || $rOpts_comma_arrow_breakpoints == 2 ) # and we made some breakpoints between the opening and closing && ( $breakpoint_undo_stack[$current_depth] < $forced_breakpoint_undo_count ) # and this block is short enough to fit on one line # Note: use < because need 1 more space for possible comma && !$is_long_term ) { undo_forced_breakpoint_stack( $breakpoint_undo_stack[$current_depth] ); } # now see if we have any comma breakpoints left my $has_comma_breakpoints = ( $breakpoint_stack[$current_depth] != $forced_breakpoint_count ); # update broken-sublist flag of the outer container $has_broken_sublist[$depth] = $has_broken_sublist[$depth] || $has_broken_sublist[$current_depth] || $is_long_term || $has_comma_breakpoints; # Having come to the closing ')', '}', or ']', now we have to decide if we # should 'open up' the structure by placing breaks at the opening and # closing containers. This is a tricky decision. Here are some of the # basic considerations: # # -If this is a BLOCK container, then any breakpoints will have already # been set (and according to user preferences), so we need do nothing here. # # -If we have a comma-separated list for which we can align the list items, # then we need to do so because otherwise the vertical aligner cannot # currently do the alignment. # # -If this container does itself contain a container which has been broken # open, then it should be broken open to properly show the structure. # # -If there is nothing to align, and no other reason to break apart, # then do not do it. # # We will not break open the parens of a long but 'simple' logical expression. # For example: # # This is an example of a simple logical expression and its formatting: # # if ( $bigwasteofspace1 && $bigwasteofspace2 # || $bigwasteofspace3 && $bigwasteofspace4 ) # # Most people would prefer this than the 'spacey' version: # # if ( # $bigwasteofspace1 && $bigwasteofspace2 # || $bigwasteofspace3 && $bigwasteofspace4 # ) # # To illustrate the rules for breaking logical expressions, consider: # # FULLY DENSE: # if ( $opt_excl # and ( exists $ids_excl_uc{$id_uc} # or grep $id_uc =~ /$_/, @ids_excl_uc )) # # This is on the verge of being difficult to read. The current default is to # open it up like this: # # DEFAULT: # if ( # $opt_excl # and ( exists $ids_excl_uc{$id_uc} # or grep $id_uc =~ /$_/, @ids_excl_uc ) # ) # # This is a compromise which tries to avoid being too dense and to spacey. # A more spaced version would be: # # SPACEY: # if ( # $opt_excl # and ( # exists $ids_excl_uc{$id_uc} # or grep $id_uc =~ /$_/, @ids_excl_uc # ) # ) # # Some people might prefer the spacey version -- an option could be added. The # innermost expression contains a long block '( exists $ids_... ')'. # # Here is how the logic goes: We will force a break at the 'or' that the # innermost expression contains, but we will not break apart its opening and # closing containers because (1) it contains no multi-line sub-containers itself, # and (2) there is no alignment to be gained by breaking it open like this # #