########################################### # File::Comments -- 2005, Mike Schilli ########################################### ########################################### package File::Comments; ########################################### use strict; use warnings; use Log::Log4perl qw(:easy); use Sysadm::Install qw(:all); use File::Basename; use Module::Pluggable require => 1, #search_path => [qw(File::Comments::Plugin)], ; our $VERSION = "0.07"; ########################################### sub new { ########################################### my($class, %options) = @_; my $self = { cold_calls => 1, default_plugin => undef, suffixes => {}, bases => {}, plugins => [], %options, }; bless $self, $class; # Init plugins $self->init(); return $self; } ########################################### sub init { ########################################### my($self) = @_; $self->{plugins} = []; for($self->plugins()) { DEBUG "Initializing plugin $_"; my $plugin = $_->new(mothership => $self); push @{$self->{plugins}}, $plugin; } } ########################################### sub find_plugin { ########################################### my($self) = @_; # Is there a suffix handler defined? if(defined $self->{target}->{suffix} and exists $self->{suffixes}->{$self->{target}->{suffix}}) { DEBUG "Searching for plugin handling suffix $self->{target}->{suffix}"; for my $plugin (@{$self->{suffixes}->{$self->{target}->{suffix}}}) { DEBUG "Checking if ", ref $plugin, " is applicable for suffix ", "'$self->{target}->{suffix}'"; if($plugin->applicable($self->{target})) { DEBUG ref($plugin), " accepted"; return $plugin; } else { DEBUG ref($plugin), " rejected"; } } } # Is there a base handler defined? if(defined $self->{target}->{file_base} and exists $self->{bases}->{$self->{target}->{file_base}}) { DEBUG "Searching for plugin handling base $self->{target}->{file_base}"; for my $plugin (@{$self->{bases}->{$self->{target}->{file_base}}}) { DEBUG "Checking if ", ref $plugin, " is applicable for base ", "'$self->{target}->{file_base}'"; if($plugin->applicable($self->{target})) { DEBUG ref($plugin), " accepted"; return $plugin; } else { DEBUG ref($plugin), " rejected"; } } } # Hmm ... no volunteers yet. return undef unless $self->{cold_calls}; # Go from door to door and check if some plugin wants to # handle it. Set the 'cold_call' flag to let the plugin know # about our desparate move. for my $plugin (@{$self->{plugins}}) { DEBUG "Checking if ", ref $plugin, " is applicable for ", "file '$self->{target}->{path}' (cold call)"; if($plugin->applicable($self->{target}, 1)) { DEBUG "Cold call accepted"; return $plugin; } else { DEBUG "Cold call rejected"; } } return undef; } ########################################### sub guess_type { ########################################### my($self, $target) = @_; if(ref $target) { $self->{target} = $target; } else { $self->{target} = File::Comments::Target->new(path => $target); } my $plugin = $self->find_plugin(); if(! defined $plugin) { ERROR "No plugin found to handle $target"; return undef; } return $plugin->type(); } ########################################### sub comments { ########################################### my($self, $target) = @_; return $_[0]->dispatch($target, "comments"); } ########################################### sub stripped { ########################################### my($self, $target) = @_; return $_[0]->dispatch($target, "stripped"); } ########################################### sub dispatch { ########################################### my($self, $target, $function) = @_; if(ref $target) { $self->{target} = $target; } else { $self->{target} = File::Comments::Target->new(path => $target); } my $plugin = $self->find_plugin(); if(! defined $plugin) { if($self->{default_plugin}) { $plugin = $self->{default_plugin}; } else { ERROR "Type of $target couldn't be determined"; # Just return and empty list return undef; } } DEBUG "Calling ", ref $plugin, " to handle $self->{target}->{path}"; return $plugin->$function($self->{target}); } ########################################### sub register_suffix { ########################################### my($self, $suffix, $plugin_obj) = @_; DEBUG "Registering ", ref $plugin_obj, " as a handler for suffix $suffix"; # Could be more than one, line them up push @{$self->{suffixes}->{$suffix}}, $plugin_obj; } ########################################### sub suffix_registered { ########################################### my($self, $suffix) = @_; return exists $self->{suffixes}->{$suffix}; } ########################################### sub register_base { ########################################### my($self, $base, $plugin_obj) = @_; DEBUG "Registering ", ref $plugin_obj, " as a handler for base $base"; # Could be more than one, line them up push @{$self->{bases}->{$base}}, $plugin_obj; } ################################################## # Poor man's Class::Struct ################################################## sub make_accessor { ################################################## my($package, $name) = @_; no strict qw(refs); my $code = <{$name} = \$value; } if(exists \$self->{$name}) { return (\$self->{$name}); } else { return ""; } } EOT if(! defined *{"$package\::$name"}) { eval $code or die "$@"; } } ########################################### package File::Comments::Target; ########################################### use Sysadm::Install qw(:all); use File::Basename; use Log::Log4perl qw(:easy); ########################################### sub new { ########################################### my($class, %options) = @_; my $self = { path => undef, dir => undef, file_name => undef, file_base => undef, content => undef, suffix => undef, %options, }; bless $self, $class; $self->load($self->{path}, $self->{content}); return $self; } ########################################### sub load { ########################################### my($self, $path, $content) = @_; $self->{content} = $content unless $content; $self->{path} = $path; $self->{content} = slurp $path unless defined $self->{content}; $self->{file_name} = basename($path); $self->{dir} = dirname($path); $self->{suffix} = undef; $self->{file_base} = $self->{file_name}; if(index($self->{file_name}, ".") >= 0) { ($self->{file_base}, $self->{suffix}) = ($self->{file_name} =~ m#(.+)(\.[^.]*$)#); } DEBUG "Loaded file path=", def($path), " name=", def($self->{file_name}), " dir=", def($self->{dir}), " suffix=", def($self->{suffix}), " base=", def($self->{file_base}); } ########################################### sub def { ########################################### return $_[0] if defined $_[0]; return "**undef**"; } File::Comments::make_accessor("File::Comments::Target", $_) for qw(path file_name file_base content suffix dir); 1; __END__ =head1 NAME File::Comments - Recognizes file formats and extracts format-specific comments =head1 SYNOPSIS use File::Comments; my $snoop = File::Comments->new(); # *---------------- # | program.c: # | /* comment */ # | main () {} # *---------------- my $comments = $snoop->comments("program.c"); # => [" comment "] # *---------------- # | script.pl: # | # comment # | print "howdy!\n"; # another comment # *---------------- my $comments = $snoop->comments("script.pl"); # => [" comment", " another comment"] # or strip comments from a file: my $stripped = $snoop->stripped("script.pl"); # => "print "howdy!\n";" # or just guess a file's type: my $type = $snoop->guess_type("program.c"); # => "c" =head1 DESCRIPTION File::Comments guesses the type of a given file, determines the format used for comments, extracts all comments, and returns them as a reference to an array of chunks. Alternatively, it strips all comments from a file. Currently supported are Perl scripts, C/C++ programs, Java, makefiles, JavaScript, Python and PHP. The plugin architecture used by File::Comments makes it easy to add new formats. To support a new format, a new plugin module has to be installed. No modifications to the File::Comments codebase are necessary, new plugins will be picked up automatically. File::Comments can also be used to simply guess a file's type. It it somewhat more flexible than File::MMagic and File::Type. File types in File::Comments are typically based on file name suffixes (*.c, *.pl, etc.). If no suffix is available, or a given suffix is ambiguous (e.g. if several plugins have registered a handler for the same suffix), then the file's content is used to narrow down the possibilities and arrive at a decision. WARNING: THIS MODULE IS UNDER DEVELOPMENT, QUALITY IS ALPHA. IF YOU FIND BUGS, OR WANT TO CONTRIBUTE PLUGINS, PLEASE SEND THEM MY WAY. =head2 FILE TYPES Currently, the following plugins are included in the File::Comments distribution: ############################################### # plugin type # ############################################### File::Comments::Plugin::C c (o) File::Comments::Plugin::Makefile makefile (X) File::Comments::Plugin::Perl perl (X) File::Comments::Plugin::JavaScript js (o) File::Comments::Plugin::Java java (o) File::Comments::Plugin::HTML html (X) File::Comments::Plugin::Python python (o) File::Comments::Plugin::PHP php (o) (X) Fully implemented (o) Implemented with regular expressions, only works for easy cases until real parsers are employed. The constants listed in the I column are the strings returned by the C method. =head1 Methods =over 4 =item $snoop = File::Comments-Enew() Create a new comment extractor engine. This will automatically initialize all plugins. To avoid cold calls (L), set C to a false value (defaults to 1): $snoop = File::Comments->new( cold_calls => 0 ); By default, if no plugin can be found for a given file, C will throw a fatal error and C. If this is undesirable and a default plugin should be used instead, it can be specified in the constructor using the C parameter: $snoop = File::Comments->new( default_plugin => "File::Comments::Plugin::Makefile" ); =item $comments = $snoop-Ecomments("program.c"); Extract all comments from a file. After determining the file type by either suffix or content (L), comments are extracted as chunks and returned as a reference to an array. To get a single string containing all comments, just join the chunks: my $comments_string = join '', @$comments; =item $stripped_text = $snoop-Estripped("program.c"); Strip all comments from a file. After determining the file type by either suffix or content (L), all comments are removed and the stripped text is returned in a scalar. =item $type = $snoop-Eguess_type("script.pl") Guess the type of a file, based on either suffix, or in absense of a suffix via L. Return the result as a string: C<"c">, C<"makefile">, C<"perl">, etc. (L). =item $snoop->suffix_registered("c") Returns true if one of the plugins has registered the given suffix. =back =head2 Writing new plugins Writing a new plugin to add functionality to the File::Comments framework is as simple as defining a new module, derived from the baseclass of all plugins, C. Three additional methods are needed: C, C, and C. C gets called when the mothership finds the plugin and initializes it. This is the time to register extensions that the plugin wants to handle. The second mandatory method for a plugin is C, which returns a string, indicating the type of the file examined. Usually this can be done without further ado, since a basic plugin will called only on files which it registered for by suffix. Exceptions to this are explained later. The third method is C, which returns a reference to an array of comment lines. The content of the source file to be examined will be available in $self->{target}->{content} by the time C gets called. And that's it. Here's a functional basic plugin, registering a new suffix ".odd" with the mothership and expecting files with comment lines that start with C: ########################################### package File::Comments::Plugin::Oddball; ########################################### use strict; use warnings; use File::Comments::Plugin; our $VERSION = "0.01"; our @ISA = qw(File::Comments::Plugin); ########################################### sub init { ########################################### my($self) = @_; $self->register_suffix(".odd"); } ########################################### sub type { ########################################### my($self) = @_; return "odd"; } ########################################### sub comments { ########################################### my($self) = @_; # Some code to extract all comments from # $self->{target}->{content}: my @comments = ($self->{target}->{content} =~ /^ODDCOMMENT:(.*)/); return \@comments; } 1; =head2 Cold Calls If a file doesn't have an extension or an extensions that's served by multiple plugins, File::Comments will go shop around and ask all plugins if they want to handle the file. The mothership calls each plugin's C method, passing it an object of type C, which contains the following fields: When the plugin gets such a I (indicated by the third parameter to C, it can either accept or deny the request. To arrive at a decision, it can peek into the target object. The Perl plugin illustrates this: ########################################### sub applicable { ########################################### my($self, $target, $cold_call) = @_; return 1 unless $cold_call; return 1 if $target->{content} =~ /^#!.*perl\b/; return 0; } If a plugin does not define a C method, a default method is inherited from the base class C, which looks like this: ########################################### sub applicable { ########################################### my($self, $target, $cold_call) = @_; return 0 if $cold_call; return 1; } This will deny all I and only accept requests for files with suffixes or base names the plugin has already signed up for. =head2 Plugin Inheritance Plugins can reuse existing plugins by inheritance. For example, if you wanted to write a I plugin that takes over all cold calls and handles comments like the C plugin, you can simply use ########################################### package File::Comments::Plugin::Catchall; ########################################### use strict; use warnings; use File::Comments::Plugin; use File::Comments::Plugin::Makefile; our $VERSION = "0.01"; our @ISA = qw(File::Comments::Plugin::Makefile); ########################################### sub applicable { ########################################### my($self) = @_; return 1; } C just implements C and inherits everything else from C. =head1 LEGALESE Copyright 2005 by Mike Schilli, all rights reserved. This program is free software, you can redistribute it and/or modify it under the same terms as Perl itself. =head1 AUTHOR 2005, Mike Schilli