Apache-SearchEngineLog

 view release on metacpan or  search on metacpan

SearchEngineLog.pm  view on Meta::CPAN

		}
		else
		{
			warn $STH->errstr ();
		}
	}

	return 1;
}

sub handler
##########################################################################
# The handler called by Apache. It analyses the request and the referer  #
# and eventually calls other subroutines to assist in this task. This    #
# is the heart of this program..                                         #
#                                                                        #
#                                                                        #
# Arguments:                                                             #
#  0: Apache::Request object                                             #
#                                                                        #
# Returns:                                                               #
#  0: true                                                               #
##########################################################################
{
	my $r = shift or return undef;
	my %h = $r->headers_in ();
	my $l = $r->log ();

	$l->debug ("Apache::SearchEngineLog: handling request..");

	# first step: check for a (valid and usfull) referer
	unless (defined $h{'Referer'})
	{
		$l->debug ("Apache::SearchEngineLog: no referer defined..");
		return 1;
	}

	my $referer = $h{'Referer'};

	my $status = $r->status ();
	if ($status >= 400)
	{
		$l->debug ("Apache::SearchEngineLog: Not handling status code #$status..");
		return 1;
	}

	my ($server, $params);
	# referers are always http.. prove me wrong if i should be..
	# https shouldn't work either I belive..
	if ($referer =~ m#^http://([^/]+)/[^\?]+\?(.+)$#)
	{
		$server = $1;
		$params = $2;
	}
	else
	{
		$l->debug ("Apache::SearchEngineLog: No parameters present..");
		return 1;
	}

	# referer looks fairly usefull.. let's check this..
	my %params; # i know some people don't like this.. I do ;)
	foreach (split (m#\&#, $params))
	{
		my ($key, $value) = split (m#=#, $_, 2);
		$value =~ y#+# #;
		$value =~ s#%([a-fA-F0-9]{2})#pack ("C", hex ($1))#eg;

		$params{$key} = $value;
	}

	my $field;
	if (!defined $SERVER->{$server})
	{
		$l->debug ("Apache::SearchEngineLog: Unknown server: $server! Checking..");

		# servers without an apropriate entry in $REGEXEN should
		# leave us here..
		$field = check_regexen ($server) or return 1;

		if (defined $params{$field})
		{
			$SERVER->{$server} = $field;

			check_alive_dbi ($l);

			my $sth = $DBH->prepare ("INSERT INTO config (domain, field) VALUES (?, ?)");
			$sth->execute ($server, $field);
			$sth->finish ();

			$l->info ("Apache::SearchEngineLog: Added new domain: $server");
		}
	}
	else
	{
		$l->debug ("Apache::SearchEngineLog: Known server: $server");
		$field = $SERVER->{$server};
	}

	unless (defined $params{$field})
	{
		$l->info ("Apache::SearchEngineLog: Known server missing field: $server");
		return 1;
	}

	# ignore goggle's cache-parameters and related option
	if ($params{$field} =~ m#^(?:cache|related):\S+\s#)
	{
		# $' == everything right of match, FYI
		$params{$field} = $';
	}

	my $uri = $r->uri ();
	my $s = $r->server ();
	my $virtual = $s->server_hostname ();

	if ($status == 301 or $status == 302 or $status == 303 or $status == 307)
	{
		my $location;
		$location = $r->header_out ('Location') or $location = '';
		



( run in 1.090 second using v1.01-cache-2.11-cpan-39bf76dae61 )