unicode results from the CPAN

CGI-IDS


=cut

#------------------------- Pragmas ---------------------------------------------
use strict;
use warnings;

#------------------------- Libs ------------------------------------------------
use XML::Simple qw(:strict);
use HTML::Entities;
use MIME::Base64;
use Encode qw(decode);
use Carp;
use Time::HiRes;
use FindBin qw($Bin);
use CGI::IDS::Whitelist;

#------------------------- Settings --------------------------------------------
$XML::Simple::PREFERRED_PARSER  = "XML::Parser";

#------------------------- Debugging -------------------------------------------
# debug modes (binary):
use constant DEBUG_KEY_VALUES       => (1 << 0); # print each key value pair
use constant DEBUG_IMPACTS          => (1 << 1); # print impact per key value pair
use constant DEBUG_ARRAY_INFO       => (1 << 2); # print attack info arrays
use constant DEBUG_CONVERTERS       => (1 << 3); # print output of each converter
use constant DEBUG_SORT_KEYS_NUM    => (1 << 4); # sort request by keys numerically
use constant DEBUG_SORT_KEYS_ALPHA  => (1 << 5); # sort request by keys alphabetically
use constant DEBUG_WHITELIST        => (1 << 6); # dumps loaded whitelist hash
use constant DEBUG_MATCHED_FILTERS  => (1 << 7); # print IDs of matched filters

#use constant DEBUG_MODE                =>  DEBUG_KEY_VALUES |
#                                       DEBUG_IMPACTS |
#                                       DEBUG_WHITELIST |
#                                       DEBUG_ARRAY_INFO |
#                                       DEBUG_CONVERTERS |
#                                       DEBUG_MATCHED_FILTERS |
#                                       DEBUG_SORT_KEYS_NUM;

# simply comment this line out to switch debugging mode on (also uncomment above declaration)
use constant DEBUG_MODE             => 0;

#------------------------- Constants -------------------------------------------

# converter functions, processed in this order
my @CONVERTERS = qw/
    stripslashes
    _convert_from_repetition
    _convert_from_commented
    _convert_from_whitespace
    _convert_from_js_charcode
    _convert_js_regex_modifiers
    _convert_entities
    _convert_quotes
    _convert_from_sql_hex
    _convert_from_sql_keywords
    _convert_from_control_chars
    _convert_from_nested_base64
    _convert_from_out_of_range_chars
    _convert_from_xml
    _convert_from_js_unicode
    _convert_from_utf7
    _convert_from_concatenated
    _convert_from_proprietary_encodings
    _run_centrifuge
/;

#------------------------- Subs ------------------------------------------------

#****m* IDS/new
# NAME
#   Constructor
# DESCRIPTION
#   Creates an IDS object.
#   The filter set and whitelist will stay loaded during the lifetime of the object.
#   You may call detect_attacks() multiple times, the attack array ( get_attacks() )
#   will be emptied at the start of each run of detect_attacks().
# INPUT
#   HASH
#     filters_file    STRING  The path to the filters XML file (defaults to shipped IDS.xml)
#     whitelist_file  STRING  The path to the whitelist XML file
#     scan_keys       INT     1 to scan also the keys, 0 if not (default: 0)
#     disable_filters ARRAYREF[INT,INT,...] if given, these filter ids will be disabled
# OUTPUT
#   IDS object, dies (croaks) if no filter rule could be loaded
# EXAMPLE
#   # instantiate object; do not scan keys, values only
#   my $ids = new CGI::IDS(
#       filters_file    => '/home/hinnerk/sandbox/ids/cgi-bin/default_filter.xml',
#       whitelist_file  => '/home/hinnerk/sandbox/ids/cgi-bin/param_whitelist.xml',
#       scan_keys       => 0,
#       disable_filters => [58,59,60],
#   );
#****

=head2 new()

Constructor. Can optionally take a hash of settings. If I<filters_file> is not given,
the shipped filter set will be loaded, I<scan_keys> defaults to 0.

The filter set and whitelist will stay loaded during the lifetime of the object.
You may call C<detect_attacks()> multiple times, the attack array (C<get_attacks()>)
will be emptied at the start of each run of C<detect_attacks()>.

For example, the following is a valid constructor:

 my $ids = new CGI::IDS(
     filters_file    => '/home/hinnerk/ids/default_filter.xml',
     whitelist_file  => '/home/hinnerk/ids/param_whitelist.xml',
     scan_keys       => 0,
     disable_filters => [58,59,60],
 );

The Constructor dies (croaks) if no filter rule could be loaded.

=cut

sub new {
    my ($package, %args) = @_;

    # defaults

lib/CGI/IDS.pm view on Meta::CPAN

#****if* IDS/_convert_entities
# NAME
#   _convert_entities
# DESCRIPTION
#   Converts from hex/dec entities (use HTML::Entities;)
# INPUT
#   value   the string to convert
# OUTPUT
#   value   converted string
# SYNOPSIS
#   IDS::_convert_entities($value);
#****

sub _convert_entities {
    my ($value) = @_;
    my $converted = '';

    # deal with double encoded payload
    $value = preg_replace(qr/&amp;/, '&', $value);

    if (preg_match(qr/&#x?[\w]+/ms, $value)) {
        $converted  = preg_replace(qr/(&#x?[\w]{2}\d?);?/ms, '$1;', $value);
        $converted  = HTML::Entities::decode_entities($converted);
        $value      .= "\n" . str_replace(';;', ';', $converted);
    }

    # normalize obfuscated protocol handlers
    $value = preg_replace(
        '/(?:j\s*a\s*v\s*a\s*s\s*c\s*r\s*i\s*p\s*t\s*)|(d\s*a\s*t\s*a\s*)/ms',
        'javascript', $value
    );

    return $value;
}

#****if* IDS/_convert_from_control_chars
# NAME
#   _convert_from_control_chars
# DESCRIPTION
#   Detects nullbytes and controls chars via ord()
# INPUT
#   value   the string to convert
# OUTPUT
#   value   converted string
# SYNOPSIS
#   IDS::_convert_from_control_chars($value);
#****

sub _convert_from_control_chars {
    my ($value) = @_;

    # critical ctrl values
    my @search  = (
        chr(0), chr(1), chr(2), chr(3), chr(4), chr(5),
        chr(6), chr(7), chr(8), chr(11), chr(12), chr(14),
        chr(15), chr(16), chr(17), chr(18), chr(19), chr(24),
        chr(25), chr(192), chr(193), chr(238), chr(255)
    );
    $value  = str_replace(\@search, '%00', $value);

    # take care for malicious unicode characters
    $value = urldecode(preg_replace(qr/(?:%E(?:2|3)%8(?:0|1)%(?:A|8|9)\w|%EF%BB%BF|%EF%BF%BD)|(?:&#(?:65|8)\d{3};?)/i, '',
            urlencode($value)));

    $value = urldecode(
        preg_replace(qr/(?:%F0%80%BE)/i, '>', urlencode($value)));
    $value = urldecode(
        preg_replace(qr/(?:%F0%80%BC)/i, '<', urlencode($value)));
    $value = urldecode(
        preg_replace(qr/(?:%F0%80%A2)/i, '"', urlencode($value)));
    $value = urldecode(
        preg_replace(qr/(?:%F0%80%A7)/i, '\'', urlencode($value)));

    $value = preg_replace(qr/(?:%ff1c)/, '<', $value);
    $value = preg_replace(
        qr/(?:&[#x]*(200|820|200|820|zwn?j|lrm|rlm)\w?;?)/i, '', $value
    );

    $value = preg_replace(qr/(?:&#(?:65|8)\d{3};?)|(?:&#(?:56|7)3\d{2};?)|(?:&#x(?:fe|20)\w{2};?)|(?:&#x(?:d[c-f])\w{2};?)/i, '',
            $value);

    $value = str_replace(
        ["\x{ab}", "\x{3008}", "\x{ff1c}", "\x{2039}", "\x{2329}", "\x{27e8}"], '<', $value
    );
    $value = str_replace(
        ["\x{bb}", "\x{3009}", "\x{ff1e}", "\x{203a}", "\x{232a}", "\x{27e9}"], '>', $value
    );

    return $value;
}

#****if* IDS/_convert_from_nested_base64
# NAME
#   _convert_from_nested_base64
# DESCRIPTION
#   Matches and translates base64 strings and fragments used in data URIs (use MIME::Base64;)
# INPUT
#   value   the string to convert
# OUTPUT
#   value   converted string
# SYNOPSIS
#   IDS::_convert_from_nested_base64($value);
#****

sub _convert_from_nested_base64 {
    my ($value) = @_;

    my @matches = ();
    preg_match_all(qr/(?:^|[,&?])\s*([a-z0-9]{30,}=*)(?:\W|$)/im, #)/
        $value,
        \@matches,
    );
    # PHP to Perl note: PHP's $matches[1] is Perl's default ($matches[0] is the entire RegEx match)
    foreach my $item (@matches) {
        if ($item && !preg_match(qr/[a-f0-9]{32}/i, $item)) {

            # fill up the string with zero bytes if too short for base64 blocks
            my $item_original = $item;
            if (my $missing_bytes = length($item) % 4) {
                for (1..$missing_bytes) {
                    $item .= "=";

lib/CGI/IDS.pm view on Meta::CPAN

            }

            my $base64_item = MIME::Base64::decode_base64($item);
            $value = str_replace($item_original, $base64_item, $value);
        }
    }

    return $value;
}

#****if* IDS/_convert_from_out_of_range_chars
# NAME
#   _convert_from_out_of_range_chars
# DESCRIPTION
#   Detects nullbytes and controls chars via ord()
# INPUT
#   value   the string to convert
# OUTPUT
#   value   converted string
# SYNOPSIS
#   IDS::_convert_from_out_of_range_chars($value);
#****

sub _convert_from_out_of_range_chars {
    my ($value) = @_;

    my @values = str_split($value);
    foreach my $item (@values) {
        if (ord($item) >= 127) {
            $value = str_replace($item, ' ', $value);
        }
    }

    return $value;
}

#****if* IDS/_convert_from_xml
# NAME
#   _convert_from_xml
# DESCRIPTION
#   Strip XML patterns
# INPUT
#   value   the string to convert
# OUTPUT
#   value   converted string
# SYNOPSIS
#   IDS::_convert_from_xml($value);
#****

sub _convert_from_xml {
    my ($value) = @_;

    my $converted = strip_tags($value);

    if ($converted && ($converted ne $value)) {
        return $value . "\n" . $converted;
    }
    return $value;
}

#****if* IDS/_convert_from_js_unicode
# NAME
#   _convert_from_js_unicode
# DESCRIPTION
#   Converts JS unicode code points to regular characters
# INPUT
#   value   the string to convert
# OUTPUT
#   value   converted string
# SYNOPSIS
#   IDS::_convert_from_js_unicode($value);
#****

sub _convert_from_js_unicode {
    my ($value) = @_;
    my @matches = ();

    # \\u instead of PHP's \\\u
    # PHP to Perl note: additional parenthesis around RegEx for getting PHP's $matches[0]
    preg_match_all(qr/(\\u[0-9a-f]{4})/ims, $value, \@matches);

    if ($matches[0]) {
        foreach my $match ($matches[0]) {
            my $chr = chr(hex(substr($match, 2, 4)));
            $value = str_replace($match, $chr, $value);
        }
        $value .= "\n".'\u0001';
    }
    return $value;
}

#****if* IDS/_convert_from_utf7
# NAME
#   _convert_from_utf7
# DESCRIPTION
#   Converts relevant UTF-7 tags to UTF-8 (use Encode qw/decode/;)
# INPUT
#   value   the string to convert
# OUTPUT
#   value   converted string
# SYNOPSIS
#   IDS::_convert_from_utf7($value);
#****

sub _convert_from_utf7 {
    my ($value) = @_;

    if (preg_match(qr/\+A\w+-/m, $value)) {
        $value .= "\n" . decode("UTF-7", $value);
    }

    return $value;
}

#****if* IDS/_convert_from_concatenated
# NAME
#   _convert_from_concatenated
# DESCRIPTION
#   Converts basic concatenations
# INPUT
#   value   the string to convert
# OUTPUT
#   value   converted string
# SYNOPSIS
#   IDS::_convert_from_concatenated($value);
#****

sub _convert_from_concatenated {
    my ($value) = @_;

    # normalize remaining backslashes
    # Perl's \\ should be equivalent to PHP's \\\
    if ($value ne preg_replace(qr/(?:(\w)\\)/, '$1', $value)) {
        $value .= preg_replace(qr/(?:(\w)\\)/, '$1', $value);

( run in 0.826 second using v1.01-cache-2.11-cpan-c966e8aa7e8 )