package Filter::Indent::HereDoc; use strict; use warnings; use Filter::Simple; our $VERSION = '1.01'; our %options = (); our @buffer; # Temporary storage of current here document our @termstring; # FIFO list of here document terminating strings sub import { %options = (); $options{$_}++ foreach (@_); } FILTER_ONLY executable => sub { my @code = split /\n/; $_ = join '',(map &process_line($_),@code); }; sub process_line { my $line = shift; if (@termstring) { # At this point we are in a here document, so all lines of code # are buffered until the end of the heredoc is detected push @buffer,$line; # 2 scenarios - terminator is a blank line, or terminator contains non- # whitespace. If blank line, then look for same whitespace at start of # each line in buffer. Otherwise take the whitespace that precedes the # terminator and match this against each line in the buffer. # # By default, we accept terminator strings in the Perl6 RFC111 format, # i.e. whitespace, ';', and comments following the terminator are # allowed. The only exception is if the terminator is a blank line, # in this case then only whitespace is allowed. my $termregex; unless ($options{strict_terminators}) { if ($termstring[0] =~ /\S/) { $termregex = qr/^(\s*)($termstring[0])(\s*;{0,1}\s*(?:#.*){0,1})$/; } else { $termregex = qr/^\s*$/; } } else { if ($termstring[0] =~ /\S/) { $termregex = qr/^(\s*)($termstring[0])$/; } else { $termregex = qr/^$/; } } my ($whitespace,$terminator,$extras); if ($line =~ $termregex) { ($whitespace,$terminator,$extras) = ($1,$2,$3); if ($termstring[0] =~ /\S/) { foreach (@buffer) { return unless (/^$whitespace/); } } else { # Terminator string is a blank line undef $whitespace; foreach (@buffer) { if (/^(\s+)\S/) { $whitespace = $1 unless ($whitespace and /^$whitespace\s*/); } } } # End of heredoc - strip the required amount of whitespace map s/^$whitespace//,@buffer; # If we found extra characters after the terminator (Perl6 RFC111 # style), move them onto a new line to be compatible with Perl5 if ($extras) { pop @buffer; push @buffer,$terminator; push @buffer,$extras; } # Return captured heredoc back to Perl and reset the buffer $line = join "\n",@buffer; @buffer = (); shift @termstring; return "$line\n"; } } else { # Perl6 RFC111 states that whitespace after the terminator # should be ingored unless ($options{strict_terminators}) { $line =~ s/(?'d, e.g. use Filter::Indent::HereDoc; print << EOT Hello, World! EOT ; # this will work use Filter::Indent::HereDoc 'strict_terminators'; print << EOT Hello, World! EOT ; # this will generate an error =head1 CAVEATS =over 4 =item * At present, Filter::Indent::HereDoc does not attempt to parse any of the Perl code, it just searches for the '<<' string to locate the start of a here document. Therefore if you need to write '<<' without starting a here document, you must first use C to stop the code from being filtered, e.g. use Filter::Indent::HereDoc; print <, L, L, Perl6 RFC111 =head1 AUTHOR Jon Allen, Ejj@jonallen.infoE =head1 THANKS TO Michael Schwern for the suggestions about Perl6 RFC111 =head1 COPYRIGHT AND LICENSE Copyright 2003 by Jon Allen This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut