The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Acme::Comment;

use strict;
use Filter::Simple;

BEGIN {
    use vars qw($VERSION);
    $VERSION    =   '1.02';
}

my $TypeCount = 0;
my $Type = 'C';
my $Conf;

{   no warnings;
    $Conf = {
        C   => {
            own_line    => 1,
            start       => quotemeta '/*',
            end         => quotemeta '*/',
            one_line    => 0,
        },
        HTML    => {
            own_line    => 1,
            start       => quotemeta '<!--',
            end         => quotemeta '-->',
            one_line    => 0,
        },
        RUBY    => {
            own_line    => 1,
            start       => quotemeta '=begin',
            end         => quotemeta '=end',
            one_line    => 0,
            single      => '#',
        },
        JAVA    => {
            own_line    => 1,
            start       => quotemeta '/*',
            end         => quotemeta '*/',
            one_line    => 0,
            single      => quotemeta '//',
        },
        PASCAL  => {
            own_line    => 1,
            start       => quotemeta '(*',
            end         => quotemeta '*)',
            one_line    => 0,
        },

        ALGOL  => {
            own_line    => 1,
            start       => quotemeta "'comment'",
            end         => quotemeta ';',
            one_line    => 0,
        },

        HUGO    => {
            own_line    => 1,
            start       => quotemeta '!\\',
            end         => quotemeta '\!',
            one_line    => 0,
            single      => '!(?!\\\\)',
        },

        BASIC   =>  {
            single      =>  q['],
        },
        PILOT   =>  {
            single      => quotemeta '\/\/',
        },
        BLUE   =>  {
            single      => '(?:==)|(?:--)',
        },

        INTERCAL    => {
            single  => '(?:\(\d+\)\s*)?DO NOTE THAT',
        },
        FORTRAN     => {
            single  => quotemeta '!',
        },
        PERL        => {
            single      => quotemeta q[#],
        },
        ALAN        => {
            single      => "--",
        },
        ORTHOGONAL  => {
            single      => quotemeta ";",
        },
        FOCAL  => {
            single      => "comment",
        },
        LATEX  => {
            single      => quotemeta "%",
        },
        FOXBASE => {
            single      => '(?:\*)|(?:&&)',
        }
    };


    ### the comment styles for ADA and Basic are the same ###
    for my $type(qw|ADA|)                               { $Conf->{$type} = $Conf->{'BASIC'} }

    for my $type(qw|POSTSCRIPT|)                        { $Conf->{$type} = $Conf->{'LATEX'} }

    for my $type(qw|ADVSYS LISP SCHEME|)                { $Conf->{$type} = $Conf->{'ORTHOGONAL'} }

    for my $type(qw|EIFFEL HASKELL|)                    { $Conf->{$type} = $Conf->{'ALAN'} }

    for my $type(qw|BETA BLISS JOY VAR'AQ|)             { $Conf->{$type} = $Conf->{'PASCAL'} }

    for my $type(qw|B PL/I CHILL|)                      { $Conf->{$type} = $Conf->{'C'} }

    for my $type(qw|C++ PHP C# CLEAN ELASTIC GUILE|)    { $Conf->{$type} = $Conf->{'JAVA'} }

    for my $type(qw|PYTHON PARROT AWK UNLAMBDA E ICON|) { $Conf->{$type} = $Conf->{'PERL'} }
}

sub import {
    my $package = shift;
    my %args    = @_;

    if(@_%2){
        die "Incomplete set of arguments to $package\n"
    }

    ### see if there are any arguments, if not, we default to the C comment style ###
    if( keys %args ) {

        ### check if the user requested a certain type of comments ###
        if( $args{type} ) {

            ### and check if it even exists ###
            if( $Conf->{ uc $args{type} } ) {
                $Type = uc $args{type};

                $Conf->{$Type}->{own_line} = $args{own_line} if defined $args{own_line};
                $Conf->{$Type}->{one_line} = $args{one_line} if defined $args{one_line};

            ### otherwise die with an error ###
            } else {
                die "Requested an unsupported type $args{type} for Acme::Comment\n";
            }

        ### otherwise, define a new type for the user ###
        } else {
            $Type = ++$TypeCount;

            unless( (defined $args{start} and defined $args{end}) or defined $args{single} ) {
                die "You need to specify both start and end tags OR a single line comment!\n";
            } else {
                if( defined $args{start} and defined $args{end} and $args{start} eq $args{end} ) {
                    die "Start and end tags must be different!\n";
                }

                $Conf->{$TypeCount}->{start}    = quotemeta($args{start})  if defined $args{start};
                $Conf->{$TypeCount}->{end}      = quotemeta($args{end})    if defined $args{end};
                $Conf->{$TypeCount}->{single}   = quotemeta($args{single}) if defined $args{single}
            }

            $Conf->{$TypeCount}->{own_line} = defined $args{own_line}
                                                ? $args{own_line}
                                                : 1;

            $Conf->{$TypeCount}->{one_line} = defined $args{one_line}
                                                ? $args{one_line}
                                                : 0;

        }

    ### no arguments, Let's take the default C comment style ###
    }
}

sub parse {

    #use Data::Dumper;
    #print scalar @_;
    #die Dumper \@_;

    my $str = shift;

    my $start   = $Conf->{$Type}->{start}     if $Conf->{$Type}->{start};
    my $end     = $Conf->{$Type}->{end}       if $Conf->{$Type}->{end};
    my $single  = $Conf->{$Type}->{single}    if $Conf->{$Type}->{single};

    my ($rdel,$ldel);
    my ($roneline, $loneline);

    if( $start && $end ) {
        ### having the comments on their own line is recommended
        ### to avoid ambiguity -kane
        $roneline = '\s*' . $end . '\s*$';
        $loneline = '^\s*' . $start . '\s*';

        if( $Conf->{$Type}->{own_line} ){
            $rdel = '^' . $roneline;
            $ldel = $loneline . '$';
        } else {
            $rdel = $roneline;
            $ldel = $loneline;
        }
    }

    ### loop counter ###
    my $i;

    ### tag counter ###
    my $counter;

    ### line number of the last found comment open ###
    my $lastopen;

    ### return value container ###
    my @return;

    for my $line (split/\n/, $str) {
        ### increase line counter ###
        $i++;

        ### if there is a single line comment available ##
        if($single) {
            if( $line =~ m|^\s*$single| ) {
    	        push @return, "";
    	    	next;
	        }
        }

        ### check if we have multiline comment options ###
        if($roneline && $loneline) {
            ### check if we are allowed to have comments on one line
            ### and if so, see if they match
            if( $Conf->{$Type}->{one_line} ) {
                if( $line =~ /$loneline.*?$roneline/) {
		            push @return, "";
                    next;
                }
            }

            ### if we find an opening tag, add to the counter
            ### and mark the line number
            if( $line =~ /$ldel/ ) {
                $lastopen = $i;
                $counter++;
		        push @return, "";
                next;

            ### if we find a closing tag, decreate the counter
            ### if counter was already at zero, there's a syntax error
            } elsif ( $line =~  /$rdel/ ) {
                unless($counter) {
                    die "Missing opening comment for closing comment on line $i\n";
                }
                $counter--;
		        push @return, "";
                next;
            }
        }

        ### if we have a counter, we're still inside a comment
        ### so dont add it then.. if the line is just whitespace
        ### we might as well ingore it too
        unless($counter or $line =~ /^\s*$/) {
            push @return, $line ;
            next;
        } else {
		    push @return, "";
		    next;
	    }
    }

    ### if we have a counter left after parsing all the lines
    ### we must have an opening tag (or more) that dont have a closing tag
    if($counter){ die "No closing bracket found for opening comment at line $lastopen\n" }

    ### Filter::Simple demands we return $_ ###
    $_ = join "\n", @return;

    return $_;
}

sub _gimme_conf { return $Conf };

FILTER_ONLY executable => sub { parse($_); };


1;

=pod

=head1 NAME

Acme::Comment

=head1 SYNOPSIS

    use Acme::Comment type=>'C++', own_line=>1;

    /*
    if (ref $mod) {
        $bar->{do}->blat(msg => 'blarg');
        eval {

    i'm sooo sick of this time for some coffee

    */

    // I prefer beer.  --sqrn

=head1 DESCRIPTION

Acme::Comment allows multi-line comments which are filtered out.
Unlike the pseudo multi-line comment C<if (0) {}>, the code being
commented out need not be syntactically valid.

=head1 USE

Acme::Comment contains several different commenting styles.

Styles may be specified by the C<types> argument, or by C<start> and
C<end> and manipulated with C<own_line> and C<one_line>.

Styles may contain multi-line comments and single-line comments.
Perl, for example, has single-line comments in the form of C<#>.

C, on the other hand, has multi-line comments which begin with
C</*> and end with C<*/>.

With multi-line comments, leaving out a begin or an end comment
will cause an error.

Both types of comments may only be preceded on a line by whitespace.

=head2 own_line

By default, C<own_line> is true, which means that multi-line comments may not
be followed by any characters other than whitespace on the same line.
This is the safest option if you think your code may contain the
comment characters (perhaps in a regex).  If you disable it, other
characters are allowed on the line after the starting delimiter, but these
characters will be ignored.  The closing delimiter cannot be followed by
any other characters.

Thus, in the following example, C<$foo> would be set to 1.

    /* This is my real comment.
    */
    $foo = 1;

If you wish to change this option, you must specify either a C<type> or
C<start> and C<end>.

=head2 one_line

By default, this is set to false, which means that multi-line comments
may not end on the same line in which they begin.  Turning this on
allows the following syntax:

    /* comment */

If you wish to change this option, you must specify either a C<type> or
C<start> and C<end>.

=head2 C<start> and C<end>

The C<start> and C<end> arguments allow you to supply your own commenting
pattern instead of one of the ones available with C<type>.  It is not
valid to provide the same pattern for both C<start> and C<end>.

You cannot specify both C<type> and C<start> and C<end>, and C<start>
and C<end> must both be provided if you provide one of them.

=head2 types

The C<types> argument specifies what language style should be used.
Only one language style may be specified.

=over 4

=item * Ada

Single-line comments begin with C<'>.

=item * Advsys

Advsys single-line comments begin with C<;>.

=item * Alan

Single-line comments start with C<-->.

=item * Algol

Multi-line comments begin with C<'comment'> and end with C<;>.

NOTE: You should not use Algol with C<own_line> set to 0:
The source filter will take a C<;> to be an ending tag for your
comments, regardless of where it is.

=item * AWK

Single-line comments use C<#>.

=item * B

Multi-line comments use C</*> and C<*/>.

=item * Basic

Single-line comments begin with C<'>.

=item * Beta

Multi-line comments use C<(*> and C<*)>.

=item * Bliss

Multi-line comments use C<(*> and C<*)>.

=item * Blue

Single-line comments use either C<==> or C<-->.

=item * C

The default for Acme::Comment is C-style multi-line commenting
with C</*> and C<*/>.  However, if you wish to change C<one_line>
or C<own_line>, you must explicitly specify the type.

=item * C++

C++ multi-line style uses C</*> and C<*/>.  Single-line uses C<//>.

=item * C#

C# multi-line style uses C</*> and C<*/>.  Single-line uses C<//>.

=item * Chill

Multi-line comments use C</*> and C<*/>.

=item * Clean

Clean multi-line style uses C</*> and C<*/>.  Single-line uses C<//>.

=item * E

Single-line comments use C<#>.

=item * Eiffel

Single-line comments start with C<-->.

=item * Elastic

Elastic multi-line style uses C</*> and C<*/>.  Single-line uses C<//>.

=item * Focal

Single-line comments start with C<comment>.

=item * Fortran

Single-line comments use C<!>.

=item * Guile

Guile multi-line style uses C</*> and C<*/>.  Single-line uses C<//>.

=item * Haskell

Single-line comments start with C<-->.

=item * HTML

HTML style has multi-line commenting in the form of C<E<lt>!--> and
C<--E<gt>>.

=item * Hugo

Multi-line comments begin with C<!\> and end with C<\!>.  Single-line
comments are not implemented due to their similarity with multi-line
comments.

=item * Icon

Single-line comments use C<#>.

=item * Intercal

Single-line comments are marked with C<DO NOTE THAT> and may optionally
be preceded by a line number in the following syntax:
C<(23) DO NOTE THAT>.

=item * Java

Java multi-line style uses C</*> and C<*/>.  Single-line uses C<//>.

=item * Joy

Multi-line comments use C<(*> and C<*)>.

=item * LaTeX

Single-line comments use C<%>.

=item * LISP

LISP single-line comments begin with C<;>.

=item * Orthogonal

Orthogonal single-line comments begin with C<;>.

=item * Parrot

Single-line comments use C<#>.

=item * Pascal

Multi-line comments use C<(*> and C<*)>.

=item * Perl

Single-line comments use C<#>.

=item * PHP

PHP multi-line style uses C</*> and C<*/>.  Single-line uses C<//>.

=item * Pilot

Single-line comments in the syntax C<\/\/> are supported.

=item * PL/I

Multi-line comments use C</*> and C<*/>.

=item * PostScript

Single-line comments use C<%>.

=item * Python

Single-line comments use C<#>.

=item * Ruby

Ruby multi-line comments begin with C<=begin> and end with
C<=end>.  Single-line comments use C<#>.

=item * Scheme

Scheme single-line comments begin with C<;>.

=item * Unlambda

Single-line comments use C<#>.

=item * Var'aq

Multi-line comments use C<(*> and C<*)>.

=back

=head1 CAVEATS

Because of the way source filters work, it is not possible to eval
code containing comments and have them correctly removed.

=head1 NOTE

Some of these programming languages may be spelled incorrectly, or
may have the wrong quote characters noted.  The majority of this
information was found by searches for language specifications.

So please report errors, as well as obscure commenting syntax you
know of.

=head1 AUTHOR

This module by
Jos Boumans E<lt>kane@cpan.orgE<gt>.

=head1 Acknowledgements

Thanks to Abigail and Glenn Maciag for their suggestions.

=head1 COPYRIGHT

This module is
copyright (c) 2002 Jos Boumans E<lt>kane@cpan.orgE<gt>.
All rights reserved.

This library is free software;
you may redistribute and/or modify it under the same
terms as Perl itself.

=cut