package CGI::IDS::Whitelist; our $VERSION = '1.0214'; #------------------------- Notes ----------------------------------------------- # This source code is documented in both POD and ROBODoc format. # Please find additional POD documentation at the end of this file # (search for "__END__"). #------------------------------------------------------------------------------- #****c* IDS::Whitelist # NAME # PerlIDS Whitelist (CGI::IDS::Whitelist) # DESCRIPTION # Whitelist Processor for PerlIDS (CGI::IDS) # AUTHOR # Hinnerk Altenburg # CREATION DATE # 2010-03-29 # COPYRIGHT # Copyright (C) 2010-2011 Hinnerk Altenburg # # This file is part of PerlIDS. # # PerlIDS is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # PerlIDS is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with PerlIDS. If not, see . #**** =head1 NAME CGI::IDS::Whitelist - Whitelist Processor for PerlIDS - Perl Website Intrusion Detection System (XSS, CSRF, SQLI, LFI etc.) =head1 DESCRIPTION Whitelist Processor for PerlIDS (L). Performs a basic string check and the whitelist check. See section L for details on setting up a whitelist file. CGI::IDS::Whitelist may also be used standalone without CGI::IDS to check whether a request has suspicious parameters at all before handing it over to CGI::IDS. This may be the case if you let worker servers do the more expensive CGI::IDS job and only want to send over the requests that have suspicious parameters. See L for an example. =head1 SYNOPSIS use CGI; use CGI::IDS::Whitelist; $query = new CGI; my $whitelist = CGI::IDS::Whitelist->new( whitelist_file => '/home/hinnerk/sandbox/ids/cgi-bin/param_whitelist.xml', ); my @request_keys = keys %$query->Vars; foreach my $key (@request_keys) { if ( $whitelist->is_suspicious(key => $key, request => $query->Vars ) { send_to_ids_worker_server( $query->Vars ); last; } } =head1 METHODS =cut #------------------------- Pragmas --------------------------------------------- use strict; use warnings; #------------------------- Libs ------------------------------------------------ use XML::Simple qw(:strict); use Carp; use JSON::XS; use Encode; #------------------------- Subs ------------------------------------------------ #****m* IDS/new # NAME # Constructor # DESCRIPTION # Creates a Whitelist object. # The whitelist will stay loaded during the lifetime of the object. # You may call is_suspicious() multiple times, the collecting debug # arrays suspicious_keys() and non_suspicious_keys() will only be # emptied by an explizit reset() call. # INPUT # HASH # whitelist_file STRING The path to the whitelist XML file # OUTPUT # Whitelist object, dies (croaks) if a whitelist parsing error occurs. # EXAMPLE # # instantiate object # my $whitelist = CGI::IDS::Whitelist->new( # whitelist_file => '/home/hinnerk/sandbox/ids/cgi-bin/param_whitelist.xml', # ); # # instantiate object without a whitelist, just performs a basic string check # my $whitelist = CGI::IDS::Whitelist->new(); #**** =head2 new() Constructor. Can optionally take the path to a whitelist file. If I is not given, just a basic string check will be performed. The whitelist will stay loaded during the lifetime of the object. You may call C multiple times, the collecting debug arrays C and C will only be emptied by an explizit C call. For example, the following are valid constructors: my $whitelist = CGI::IDS::Whitelist->new( whitelist_file => '/home/hinnerk/sandbox/ids/cgi-bin/param_whitelist.xml', ); my $whitelist = CGI::IDS::Whitelist->new(); The Constructor dies (croaks) if a whitelist parsing error occurs. =cut sub new { my ($package, %args) = @_; # self member variables my $self = { whitelist_file => $args{whitelist_file}, suspicious_keys => [], non_suspicious_keys => [], }; # create object bless $self, $package; # read & parse XML $self->_load_whitelist_from_xml($self->{whitelist_file}); return $self; } #****m* IDS/Whitelist/is_suspicious # NAME # is_suspicious # DESCRIPTION # Performs the whitelist check for a given request parameter. # INPUT # HASHREF # + key The key of the request parameter to be checked # + request HASHREF to the complete request (for whitelist conditions check) # OUTPUT # 1 if you should check it with the complete filter set, # 0 if harmless or sucessfully whitelisted. # SYNOPSIS # $whitelist->is_suspicious( key => 'mykey', request => $request ); #**** =head2 is_suspicious() DESCRIPTION Performs the whitelist check for a given request parameter. INPUT HASHREF + key The key of the request parameter to be checked + request HASHREF to the complete request (for whitelist conditions check) OUTPUT 1 if you should check it with the complete filter set, 0 if harmless or sucessfully whitelisted. SYNOPSIS $whitelist->is_suspicious( key => 'mykey', request => $request ); =cut sub is_suspicious { my ($self, %args) = @_; my $key = $args{key}; my $request = $args{request}; my $request_value = $args{request}->{$key}; my $contains_encoding = 0; # skip if value is empty or generally whitelisted if ( $request_value ne '' && !( $self->{whitelist}{$key} && !defined($self->{whitelist}{$key}->{rule}) && !defined($self->{whitelist}{$key}->{conditions}) && !defined($self->{whitelist}{$key}->{encoding}) ) ) { my $request_value_orig = $request_value; $request_value = $self->convert_if_marked_encoded(key => $key, value => $request_value); if ($request_value ne $request_value_orig) { $contains_encoding = 1; } $request_value = $self->make_utf_8($request_value); # scan only if value is not harmless if ( !$self->is_harmless_string($request_value) ) { my $attacks = {}; if (!$self->{whitelist}{$key}) { # apply filters to value, not in whitelist push (@{$self->{suspicious_keys}}, {key => $key, value => $request_value, reason => 'key'}); # key not whitelisted return 1; } else { # check if all conditions match my $condition_mismatch = 0; foreach my $condition (@{$self->{whitelist}{$key}->{conditions}}) { if (! defined($request->{$condition->{key}}) || ( defined ($condition->{rule}) && $request->{$condition->{key}} !~ $condition->{rule} ) ) { $condition_mismatch = 1; } } # Apply filters if key is not in whitelisted environment conditions # or if the value does not match the whitelist rule if one is set. # Filtering is skipped if no rule is set. if ( $condition_mismatch || (defined($self->{whitelist}{$key}->{rule}) && $request_value !~ $self->{whitelist}{$key}->{rule}) || $contains_encoding ) { # apply filters to value, whitelist rules mismatched my $reason = ''; if ($condition_mismatch) { $reason = 'cond'; # condition mismatch } elsif (!$contains_encoding) { $reason = 'rule'; # rule mismatch } else { $reason = 'enc'; # contains encoding } push (@{$self->{suspicious_keys}}, {key => $key, value => $request_value, reason => $reason}); return 1; } else { # skipped, whitelist rule matched push (@{$self->{non_suspicious_keys}}, {key => $key, value => $request_value, reason => 'r&c'}); # rule & conditions matched } } } else { # skipped, harmless string push (@{$self->{non_suspicious_keys}}, {key => $key, value => $request_value, reason => 'harml'}); # harmless } } else { # skipped, empty value or key generally whitelisted my $reason = $request_value ? 'key' : 'empty'; push (@{$self->{non_suspicious_keys}}, {key => $key, value => $request_value, reason => $reason}); } return 0; } #****m* IDS/Whitelist/convert_if_marked_encoded # NAME # convert_if_marked_encoded # DESCRIPTION # Tries to JSON-decode and flatten a value to a plain string if the key has been marked as JSON in the whitelist. # Other encodings may follow in future. # INPUT # HASHREF # + key # + value # OUTPUT # The JSON-decoded and flattened 'value' if key is marked JSON. Plain keys and values, newline separated. # Untouched 'value' otherwise. # SYNOPSIS # $whitelist->convert_if_marked_encoded( key => 'data', value = '{"a":"b","c":["123", 111, "456"]}'); #**** =head2 convert_if_marked_encoded() DESCRIPTION Tries to JSON-decode and flatten a value to a plain string if the key has been marked as JSON in the whitelist. Other encodings may follow in future. INPUT HASHREF + key + value OUTPUT The JSON-decoded and flattened 'value' if key is marked JSON. Plain keys and values, newline separated. Untouched 'value' otherwise. SYNOPSIS $whitelist->convert_if_marked_encoded( key => 'data', value => '{"a":"b","c":["123", 111, "456"]}'); =cut sub convert_if_marked_encoded { my ($self, %args) = @_; my $key = $args{key}; my $request_value = $args{value}; # If marked as JSON, try to convert from JSON to reduce false positives if (defined($self->{whitelist}{$key}) && defined($self->{whitelist}{$key}->{encoding}) && $self->{whitelist}{$key}->{encoding} eq 'json') { $request_value = _json_to_string($request_value); } return $request_value; } #****m* IDS/Whitelist/suspicious_keys # NAME # suspicious_keys # DESCRIPTION # Returns the set of filters that are suspicious # Keys are listed from the last reset() or Whitelist->new() # INPUT # none # OUTPUT # [ { 'value' => , 'reason' => , 'key' => }, { ... } ] # SYNOPSIS # $whitelist->suspicious_keys(); #**** =head2 suspicious_keys() DESCRIPTION Returns the set of filters that are suspicious Keys are listed from the last reset() or Whitelist->new() INPUT none OUTPUT [ { 'value' => , 'reason' => , 'key' => }, { ... } ] SYNOPSIS $whitelist->suspicious_keys(); =cut sub suspicious_keys { my ($self) = @_; return $self->{suspicious_keys}; } #****m* IDS/Whitelist/non_suspicious_keys # NAME # non_suspicious_keys # DESCRIPTION # Returns the set of filters that have been checked but are not suspicious # Keys are listed from the last reset() or Whitelist->new() # INPUT # none # OUTPUT # [ { 'value' => , 'reason' => , 'key' => }, { ... } ] # SYNOPSIS # $whitelist->non_suspicious_keys(); #**** =head2 non_suspicious_keys() DESCRIPTION Returns the set of filters that have been checked but are not suspicious Keys are listed from the last reset() or Whitelist->new() INPUT none OUTPUT [ { 'value' => , 'reason' => , 'key' => }, { ... } ] SYNOPSIS $whitelist->non_suspicious_keys(); =cut sub non_suspicious_keys { my ($self) = @_; return $self->{non_suspicious_keys}; } #****m* IDS/Whitelist/reset # NAME # reset # DESCRIPTION # resets the member variables suspicious_keys and non_suspicious_keys to [] # INPUT # none # OUTPUT # none # SYNOPSIS # $whitelist->reset(); #**** =head2 reset() DESCRIPTION resets the member variables suspicious_keys and non_suspicious_keys to [] INPUT none OUTPUT none SYNOPSIS $whitelist->reset(); =cut sub reset { my ($self) = @_; $self->{suspicious_keys} = []; $self->{non_suspicious_keys} = []; } #****f* IDS/Whitelist/is_harmless_string # NAME # is_harmless_string # DESCRIPTION # Performs a basic regexp check for harmless characters # INPUT # + string # OUTPUT # BOOLEAN (pattern match return value) # SYNOPSIS # $whitelist->is_harmless_string( $string ); #**** =head2 is_harmless_string() DESCRIPTION Performs a basic regexp check for harmless characters INPUT + string OUTPUT BOOLEAN (pattern match return value) SYNOPSIS $whitelist->is_harmless_string( $string ); =cut sub is_harmless_string { my ($self, $string) = @_; $string = $self->make_utf_8($string); return ( $string !~ m/[^\w\s\/@!?\.]+|(?:\.\/)|(?:@@\w+)/ ); } #****f* IDS/Whitelist/make_utf_8 # NAME # make_utf_8 # DESCRIPTION # Encodes string to UTF-8 and strips malformed UTF-8 characters # INPUT # + string # OUTPUT # UTF-8 string # SYNOPSIS # $whitelist->make_utf_8( $string ); #**** =head2 make_utf_8() DESCRIPTION Encodes string to UTF-8 and strips malformed UTF-8 characters INPUT + string OUTPUT UTF-8 string SYNOPSIS $whitelist->make_utf_8( $string ); =cut sub make_utf_8 { my ($self, $string) = @_; # make string UTF-8 my $utf8_encoded = ''; eval { $utf8_encoded = Encode::encode('UTF-8', $string, Encode::FB_CROAK); }; if ($@) { # sanitize malformed UTF-8 $utf8_encoded = ''; my @chars = split(//, $string); foreach my $char (@chars) { my $utf_8_char = eval { Encode::encode('UTF-8', $char, Encode::FB_CROAK) } or next; $utf8_encoded .= $utf_8_char; } } return $utf8_encoded; } #****im* IDS/Whitelist/_load_whitelist_from_xml # NAME # _load_whitelist_from_xml # DESCRIPTION # loads the parameter whitelist XML file # croaks if a xml or regexp parsing error occors # INPUT # whitelistfile path + name of the XML whitelist file # OUTPUT # int number of loaded rules # SYNOPSIS # $self->_load_whitelist_from_xml('/home/xyz/param_whitelist.xml'); #**** sub _load_whitelist_from_xml { my ($self, $whitelistfile) = @_; my $whitelistcnt = 0; if ($whitelistfile) { # read & parse whitelist XML my $whitelistxml; eval { $whitelistxml = XMLin($whitelistfile, forcearray => [ qw(whitelist param conditions condition)], keyattr => [], ); }; if ($@) { croak "Error in _load_whitelist_from_xml while parsing $whitelistfile: $@"; } # convert XML structure into handy data structure foreach my $whitelistobj (@{$whitelistxml->{param}}) { my @conditionslist = (); foreach my $condition (@{$whitelistobj->{conditions}[0]{condition}}) { if (defined($condition->{rule})) { # copy for error message my $rule = $condition->{rule}; eval { $condition->{rule} = qr/$condition->{rule}/ms; }; if ($@) { croak 'Error in whitelist rule of condition "' . $condition->{key} . '" for param "' . $whitelistobj->{key} . '": ' . $rule . ' Message: ' . $@; } } push(@conditionslist, $condition); } my %whitelisthash = (); if (defined($whitelistobj->{rule})) { eval { $whitelisthash{rule} = qr/$whitelistobj->{rule}/ms; }; if ($@) { croak 'Error in whitelist rule for param "' . $whitelistobj->{key} . '": ' . $whitelistobj->{rule} . ' Message: ' . $@; } } if (@conditionslist) { $whitelisthash{conditions} = \@conditionslist; } if ($whitelistobj->{encoding}) { $whitelisthash{encoding} = $whitelistobj->{encoding}; } $self->{whitelist}{$whitelistobj->{key}} = \%whitelisthash; $whitelistcnt++; } } return $whitelistcnt; } #****if* IDS/Whitelist/_json_to_string # NAME # _json_to_string # DESCRIPTION # Tries to decode a string from JSON. Uses _datastructure_to_string(). # INPUT # value the string to convert # OUTPUT # value converted string if correct JSON, the unchanged input string otherwise # SYNOPSIS # IDS::Whitelist::_json_to_string($value); #**** sub _json_to_string { my ($value) = @_; my $json_ds; eval { $json_ds = JSON::XS::decode_json($value); }; if (!$@) { $value = _datastructure_to_string($json_ds)."\n"; } return $value; } #****if* IDS/Whitelist/_datastructure_to_string # NAME # _datastructure_to_string # DESCRIPTION # Walks recursively through array or hash and concatenates keys and values to one single string (\n separated) # INPUT # ref the array/hash to convert # OUTPUT # string converted string # SYNOPSIS # IDS::Whitelist::_datastructure_to_string($ref); #**** sub _datastructure_to_string { my $in = shift; my $out = ''; if (ref $in eq 'HASH') { foreach (keys %$in) { $out .= $_."\n"; $out .= _datastructure_to_string($in->{$_}); } } elsif (ref $in eq 'ARRAY') { foreach (@$in) { $out = _datastructure_to_string($_) . $out; } } else { $out .= $in."\n"; } return $out; } 1; __END__ =head1 BUGS & SUPPORT see L and L =head1 AUTHOR Hinnerk Altenburg, C<< >> =head1 SEE ALSO L =head1 COPYRIGHT & LICENSE Copyright (C) 2011 Hinnerk Altenburg (L) This file is part of PerlIDS. PerlIDS is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. PerlIDS is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with PerlIDS. If not, see . =cut