# # Courier::Filter::Module::Parts class # # (C) 2003-2008 Julian Mehnle # $Id: Parts.pm 210 2008-03-21 19:30:31Z julian $ # ############################################################################### =head1 NAME Courier::Filter::Module::Parts - Message (MIME multipart and ZIP archive) parts filter module for the Courier::Filter framework =cut package Courier::Filter::Module::Parts; use warnings; use strict; use base 'Courier::Filter::Module'; use MIME::Parser 5.4; use IO::InnerFile 2.110; # Require either MIME::Parser 5.413 or lower, or IO::InnerFile 2.110+ # (where IO::InnerFile::seek() properly returns TRUE when appropriate). use Digest::MD5; use File::Spec; # In-memory processing doesn't work, see comments in match_mime_part(). use constant TRUE => (0 == 0); use constant FALSE => not TRUE; use constant default_response => 'Prohibited message part detected.'; =head1 SYNOPSIS use Courier::Filter::Module::Parts; my $module = Courier::Filter::Module::Parts->new( max_message_size => $max_message_size, max_part_size => $max_part_size, views => ['raw', 'zip'], signatures => [ { # One or more of the following options: mime_type => 'text/html' || qr/html/i, file_name => 'file_name.ext' || qr/\.(com|exe)$/i, size => 106496, digest_md5 => 'b09e26c292759d654633d3c8ed00d18d', encrypted => 0, # Optionally any of the following: views => ['raw', 'zip'], response => $response_text }, ... ], logger => $logger, inverse => 0, trusting => 0, testing => 0, debugging => 0 ); my $filter = Courier::Filter->new( ... modules => [ $module ], ... ); =head1 DESCRIPTION This class is a filter module class for use with Courier::Filter. It matches a message if one of the message's parts (MIME parts, or files in a ZIP archive) matches one of the configured signatures. =cut # Implementation: ############################################################################### =head2 Constructor The following constructor is provided: =over =item B: returns I Creates a new B filter module. %options is a list of key/value pairs representing any of the following options: =over =item B An arrayref containing the global default set of I the filter module should apply to message parts when matching the configured signatures against them. A view is the way how a MIME part's (MIME-decoded) data is interpreted. Defaults to B<['raw']>. The following views are supported: =over =item B The MIME part is MIME-decoded but not otherwise transformed. The raw MIME part is then matched against the configured signatures. =item B If the MIME part has a file name ending in C<.zip>, it is considered a ZIP archive, and all unencrypted files in the archive are matched as individual message parts against the configured signatures. The zip view requires the B Perl module to be installed. =back =item B =item B (DEPRECATED) An integer value controlling the maximum size (in bytes) of the overall message text for a message to be processed by this filter module. Messages larger than this value will never be processed, and thus will never match. If B, there is no size limit. Defaults to B<1024**2> (1MB). As MIME multipart and ZIP archive processing can be quite CPU- and memory-intensive (although the B filter module makes use of temporary files since version 0.13), you should definitely restrict the message size to some sensible value that easily fits in your server's memory. 1024**2 (1MB) should be appropriate for most uses of this filter module. The C option was previously called C, but the latter is now deprecated and may not be supported in future versions of the B filter module. =item B An integer value controlling the maximum size (in bytes) of any single message part (i.e. MIME part in a message, or file in an archive) for that part to be processed by this filter module. Parts larger than this value will never be processed, and thus will never match. If B, there is no size limit. Defaults to the value of the C option, so you don't really need to specify a part size limit if you are comfortable with using the same value for both. See the C option for its default. If you make use of the B<'zip'> view, be aware of the risk posed by so-called I, which allow messages to easily fall below the overall message size limit, while a file in a small attached ZIP archive can decompress to a huge size. The part size limit prevents huge files from being decompressed. =item B I. A reference to an array containing the list of I against which message parts are to be matched. A signature in turn is a reference to a hash containing one or more so-called signature I (as key/value pairs) and any signature I (also as key/value pairs). I Aspects may either be scalar values (for exact, case-sensitive matches), or regular expression objects created with the C operator (for inexact, partial matches). For a signature to match a message part, I of the signature's specified aspects must match those of the message part. For the filter module to match a message, I of the signatures must match I of the message's parts. A signature aspect can be any of the following: =over =item B The MIME type of the message part ('type/sub-type'). =item B The file name of the message part. =item B The exact size (in bytes) of the decoded message part. =item B The MD5 digest of the decoded message part (32 hex digits, as printed by `md5sum`). =item B A boolean value denoting whether the message part is encrypted and its contents are inaccessible to the B filter module. =back I A signature option can be any of the following: =over =item B An arrayref containing the set of I the filter module should apply to message parts when matching I signature against them. For a list of supported views, see the description of the constructor's C option. Defaults to the global set of views specified to the constructor. =item B A string that is to be returned as the match result in case of a match. Defaults to B<"Prohibited message part detected.">. =back I So for instance, a signature list could look like this: signatures => [ { mime_type => qr/html/i, response => 'No HTML mail, please.' }, { file_name => qr/\.(com|exe|lnk|pif|scr|vbs)$/i, response => 'Executable content detected' }, { size => 106496, digest_md5 => 'b09e26c292759d654633d3c8ed00d18d', views => ['raw', 'zip'], # Look into ZIP archives, too! response => 'Worm detected: W32.Swen' }, { size => 22528, # Cannot set a specific digest_md5 since W32.Mydoom # is polymorphic. response => 'Worm suspected: W32.Mydoom' }, { encrypted => 1, views => ['zip'], response => 'Worm suspected ' . '(only worms and fools use ZIP encryption)' } ] =back All options of the B constructor are also supported by the constructor of the B filter module. Please see L for their descriptions. =cut sub new { my ($class, %options) = @_; my $mime_parser = MIME::Parser->new(); #$mime_parser->output_to_core(TRUE); # In-memory processing doesn't work, see comments in match_mime_part(). $mime_parser->output_under(File::Spec->tmpdir); #$mime_parser->tmp_to_core(TRUE); # In-memory processing doesn't work, see comments in match_mime_part(). $mime_parser->use_inner_files(TRUE); my $self = $class->SUPER::new( %options, mime_parser => $mime_parser ); # Default "max_message_size" option to the deprecated "max_size" option, # or to 1024**2 (1MB): $self->{max_message_size} = ( exists($self->{max_size}) ? $self->{max_size} : 1024**2 ) if not exists($self->{max_message_size}); # Default "max_part_size" option to the "max_message_size" option: $self->{max_part_size} = $self->{max_message_size} if not exists($self->{max_part_size}); # Default "views" option to 'raw': my $views = $self->{views} || { 'raw' => TRUE }; # Transform "views" option into hashref if it was given as an arrayref: $views = { map(($_ => TRUE), @$views) } if ref($views) eq 'ARRAY'; my $used_views = { %$views }; foreach my $signature ( @{$self->{signatures}} ) { # Default "views" option to global "views" option: my $signature_views = $signature->{views} || $views; # Transform "views" option into hashref if it was given as an arrayref: $signature_views = { map(($_ => TRUE), @$signature_views) } if ref($signature_views) eq 'ARRAY'; # Add any signature-specific views to the global set of used views: %$used_views = (%$used_views, %$signature_views); $signature->{views} = $signature_views; $self->compile_signature($signature); } $self->{used_views} = $used_views; return $self; } =back =head2 Instance methods See L for a description of the provided instance methods. =cut sub match { my ($self, $message) = @_; return undef if defined($self->{max_message_size}) and -s $message->file_name > $self->{max_message_size}; #my $text = $message->text; #my $part = $self->{mime_parser}->parse_data($text); # In-memory processing doesn't work, see comments in match_mime_part(). my $part = $self->{mime_parser}->parse_open($message->file_name); my ($result, @code) = $self->match_mime_part($part); $result &&= 'Parts: ' . $result; $self->{mime_parser}->filer->purge(); # In-memory processing doesn't work, see comments in match_mime_part(). rmdir($self->{mime_parser}->filer->output_dir); #if MIME::Tools->VERSION < 6.0; # Purging also doesn't work properly # (bug filed: ). return ($result, @code); } sub match_mime_part { my ($self, $part) = @_; if (my $body = $part->bodyhandle) { # No sub-parts, match this part. #my $handle = $body->open('r'); # In-memory processing doesn't work because MIME::Body::open() # doesn't provide a fully-IO::Handle-compatible I/O handle object # (opened() method is missing, no bug filed). Working around that # by alternatively creating a Perl 5.8 style in-memory file # object... # my $body_as_string = $body->as_string; # open(my $handle, '+<', \$body_as_string); # ...doesn't work either because Archive::Zip::_isSeekable() is # broken (erroneously considers Perl 5.8 style in-memory IO::File # objects not to be seekable, # bug filed: ). # All of this forces us to make MIME::Parser use temporary files # instead of doing everything exclusively in-memory. Aaargh!! # First, we gather signature makers for all possible (and enabled) # views of the MIME part, then we actually test each view in turn # against the configured test signatures. my @views; # Raw view (the MIME part itself) (if enabled): my $rawsig = $self->make_signature_from_mime_part($part); if ($self->{used_views}->{'raw'}) { push( @views, { name => 'raw', sig_maker => sub { $rawsig } } ) if not defined($self->{max_part_size}) or $rawsig->{size} <= $self->{max_part_size}; } # ZIP archive members view (if enabled and MIME part is a ZIP archive): if ( $self->{used_views}->{'zip'} and defined($rawsig->{file_name}) and $rawsig->{file_name} =~ /\.zip$/i ) { require Archive::Zip; my $archive = Archive::Zip->new(); #$archive->readFromFileHandle($handle); # In-memory processing doesn't work, see above. $archive->read($body->path); # Make a view for each archive member: foreach my $member ($archive->members) { push( @views, { name => 'zip', sig_maker => sub { $self->make_signature_from_zip_archive_member($member) } } ) if not defined($self->{max_part_size}) or $member->uncompressedSize <= $self->{max_part_size}; } } # Now, for each view, try matching the configured signatures: foreach my $view (@views) { # Make signature from data view: my $datasig = $view->{sig_maker}->(); # Test that signature against the configured signatures: foreach my $signature ( @{$self->{signatures}} ) { # Skip this signature if it doesn't apply to the current view: next if not $signature->{views}->{ $view->{name} }; my ($result, @code) = $signature->{matcher}->($datasig); return ($result, @code) if defined($result); } } } else { # Match all sub-parts: foreach my $subpart ($part->parts) { my ($result, @code) = $self->match_mime_part($subpart); return ($result, @code) if defined($result); } } return undef; } sub make_signature_from_mime_part { my ($self, $part) = @_; my $head = $part->head; my $body = $part->bodyhandle; my $text = $body->as_string; return { mime_type => $head->mime_type, file_name => $head->recommended_filename, size => length($text), digest_md5 => Digest::MD5::md5_hex($text), encrypted => FALSE }; } sub make_signature_from_zip_archive_member { my ($self, $member) = @_; return { mime_type => undef, file_name => $member->fileName, size => $member->uncompressedSize, digest_md5 => $member->isEncrypted ? undef : Digest::MD5::md5_hex(scalar($member->contents)), encrypted => $member->isEncrypted }; } sub compile_signature { my ($self, $signature) = @_; my %matchers; my @aspects = grep(!/^(?:response|views)$/, keys(%$signature)); foreach my $aspect (@aspects) { my $pattern = $signature->{$aspect}; my $matcher; if (ref($pattern) eq 'Regexp') { $matcher = sub { $_[0] =~ $pattern }; } elsif (ref($pattern) eq 'CODE') { $matcher = $pattern; } else { if ($aspect =~ /^(?:encrypted)$/) { # Aspect is of boolean type: $matcher = sub { not ($_[0] xor $pattern) }; } else { $matcher = sub { $_[0] eq $pattern }; } } $matchers{$aspect} = $matcher; } my @response = ref($signature->{response}) eq 'ARRAY' ? @{ $signature->{response} } : ($signature->{response} || $self->default_response); my $matcher = sub { # Closure with regard to %matchers. my ($signature) = @_; foreach my $aspect (keys(%matchers)) { my $value = $signature->{$aspect}; return undef if not defined($value) or not $matchers{$aspect}->($value); } return @response; }; $signature->{matcher} = $matcher; return; } =head1 SEE ALSO L, L. For AVAILABILITY, SUPPORT, and LICENSE information, see L. =head1 AUTHOR Julian Mehnle =cut TRUE;