Apache-SearchEngineLog
view release on metacpan or search on metacpan
SearchEngineLog.pm view on Meta::CPAN
}
else
{
warn $STH->errstr ();
}
}
return 1;
}
sub handler
##########################################################################
# The handler called by Apache. It analyses the request and the referer #
# and eventually calls other subroutines to assist in this task. This #
# is the heart of this program.. #
# #
# #
# Arguments: #
# 0: Apache::Request object #
# #
# Returns: #
# 0: true #
##########################################################################
{
my $r = shift or return undef;
my %h = $r->headers_in ();
my $l = $r->log ();
$l->debug ("Apache::SearchEngineLog: handling request..");
# first step: check for a (valid and usfull) referer
unless (defined $h{'Referer'})
{
$l->debug ("Apache::SearchEngineLog: no referer defined..");
return 1;
}
my $referer = $h{'Referer'};
my $status = $r->status ();
if ($status >= 400)
{
$l->debug ("Apache::SearchEngineLog: Not handling status code #$status..");
return 1;
}
my ($server, $params);
# referers are always http.. prove me wrong if i should be..
# https shouldn't work either I belive..
if ($referer =~ m#^http://([^/]+)/[^\?]+\?(.+)$#)
{
$server = $1;
$params = $2;
}
else
{
$l->debug ("Apache::SearchEngineLog: No parameters present..");
return 1;
}
# referer looks fairly usefull.. let's check this..
my %params; # i know some people don't like this.. I do ;)
foreach (split (m#\&#, $params))
{
my ($key, $value) = split (m#=#, $_, 2);
$value =~ y#+# #;
$value =~ s#%([a-fA-F0-9]{2})#pack ("C", hex ($1))#eg;
$params{$key} = $value;
}
my $field;
if (!defined $SERVER->{$server})
{
$l->debug ("Apache::SearchEngineLog: Unknown server: $server! Checking..");
# servers without an apropriate entry in $REGEXEN should
# leave us here..
$field = check_regexen ($server) or return 1;
if (defined $params{$field})
{
$SERVER->{$server} = $field;
check_alive_dbi ($l);
my $sth = $DBH->prepare ("INSERT INTO config (domain, field) VALUES (?, ?)");
$sth->execute ($server, $field);
$sth->finish ();
$l->info ("Apache::SearchEngineLog: Added new domain: $server");
}
}
else
{
$l->debug ("Apache::SearchEngineLog: Known server: $server");
$field = $SERVER->{$server};
}
unless (defined $params{$field})
{
$l->info ("Apache::SearchEngineLog: Known server missing field: $server");
return 1;
}
# ignore goggle's cache-parameters and related option
if ($params{$field} =~ m#^(?:cache|related):\S+\s#)
{
# $' == everything right of match, FYI
$params{$field} = $';
}
my $uri = $r->uri ();
my $s = $r->server ();
my $virtual = $s->server_hostname ();
if ($status == 301 or $status == 302 or $status == 303 or $status == 307)
{
my $location;
$location = $r->header_out ('Location') or $location = '';
( run in 1.090 second using v1.01-cache-2.11-cpan-39bf76dae61 )