Apache-Logmonster
view release on metacpan or search on metacpan
lib/Apache/Logmonster.pm view on Meta::CPAN
};
my $lines = 0;
$self->_progress_begin("\t parsing entries from $file") if $debug;
while ( $gz->gzreadline($_) > 0 ) {
chomp $_;
$lines++;
$self->_progress_continue() if ( $debug && $lines =~ /00$/ );
my %data;
@data{@captured_fields} = /$re/; # no need for /o, a compiled regexp
# make sure the log format has the vhost tag appended
my $vhost = $data{'vhost'};
if ( !$vhost || $vhost eq '-' ) {
#print "Invalid log entries! Read the FAQ!\n" if $debug;
print $_ . "\n" if $debug > 2;
$vhost = $conf->{default_vhost};
$bad++;
};
$vhost = lc($vhost);
$self->spam_check(\%data, \%count);
if ( ! $fhs{$vhost} ) {
$self->open_vhost_handle( $vhost );
};
if ( $fhs{$vhost} ) {
my $fh = $fhs{$vhost};
print $fh "$_\n";
$count{$vhost}++;
next;
};
print "\nthe main domain for $vhost is missing!\n" if $debug > 1;
$orphans{$vhost} = $vhost;
};
$gz->gzclose();
$self->_progress_end() if $debug;
};
$self->report_matches( \%count, \%orphans);
$self->report_spam_hits( \%count );
$self->report_bad_hits( $bad );
return \%fhs;
};
sub spam_check {
my ($self, $data, $count) = @_;
my $conf = $self->{conf};
return if ! $conf->{spam_check};
my $spam_score = 0;
# check for spam quotient
if ( $data->{status} ) {
if ( $data->{status} == 404 ) { # check for 404 status
$spam_score++; # a 404 alone is not a sign of naughtiness
}
if ( $data->{status} == 412 ) { # httpd config slapping them
$spam_score++;
}
if ( $data->{status} == 403 ) { # httpd config slapping them
$spam_score += 2;
}
}
# nearly all of my referer spam has a # ending the referer string
if ( $data->{ref} && $data->{ref} =~ /#$/ ) {
$spam_score += 2;
}
# should check for invalid/suspect useragent strings here
if ( $data->{ua} ) {
$spam_score +=
$data->{ua} =~ /crazy/ixms ? 1
: $data->{ua} =~ /email/i ? 3
# : $data->{ua} =~ /windows/ ? 1
: 0;
}
# if we fail more than one spam test...
if ( $spam_score > 2 ) {
$count->{spam}++;
if ( defined $data->{bytes}
&& $data->{bytes} =~ /[0-9]+/ )
{
$count->{bytes} += $data->{bytes};
}
$count->{spam_agents}{ $data->{ua} }++;
$count->{spam_referers}{ $data->{ref} }++;
# printf "%3s - %30s - %30s \n", $data->{status},
# $data->{ref}, $data->{ua};
next; # skips processing the line
}
# TODO: also keep track of ham referers, and print in referer spam reports, so
# that I can see which UA are entirely spammers and block them in my Apache
# config.
# else {
# $count->{ham_referers}{$data->{ref}}++;
# }
};
sub open_vhost_handle {
my $self = shift;
my $vhost = shift;
my $fh = new FileHandle; # create a file handle for each ServerName
$fhs{$vhost} = $fh; # store in a hash keyed off the domain name
my $debug = $self->{debug};
my $dir = $self->{conf}{tmpdir}; # normally /var/log/(apache|http)/tmp
( run in 1.294 second using v1.01-cache-2.11-cpan-98e64b0badf )