Apache-SearchEngineLog
view release on metacpan or search on metacpan
SearchEngineLog.pm view on Meta::CPAN
}
sub check_regexen
##########################################################################
# Uses regexen to check which rule applies to a given server (if any) #
# and returns the parameter field which contains usefull information. #
# #
# Arguments: #
# 0: Name of server as string #
# #
# Returns: #
# 0: Name of parameter #
##########################################################################
{
my $server = shift;
my $retval = '';
foreach my $re (keys %$REGEXEN)
{
if ($server =~ m#$re#)
{
$retval = $REGEXEN->{$re};
last;
}
}
return $retval;
}
sub cleanup
##########################################################################
# Checks wether DB connection is alive by pinging it periodically. #
# Reconnects if neccessary. #
# #
# Arguments: #
# None #
# #
# Returns: #
# 0: true #
##########################################################################
{
$DBH->disconnect ();
return 1;
}
sub connect_dbi
##########################################################################
# Connects to the database. #
# #
# Arguments: #
# 0: Apache::Log object #
# #
# Returns: #
# 0: true if successfully connected, false otherwise #
##########################################################################
{
my $l = shift;
my $db_source = $ENV{'DBI_data_source'} or $l->error ("Apache::SearchEngineLog: DBI_data_source not defined");
my $db_user = $ENV{'DBI_username'} or $l->error ("Apache::SearchEngineLog: DBI_username not defined");
my $db_passwd = $ENV{'DBI_password'} or $l->error ("Apache::SearchEngineLog: DBI_password not defined");
my $db_table = (defined $ENV{'DBI_table'} ? $ENV{'DBI_table'} : 'hits');
if ($DBH = DBI->connect ($db_source, $db_user, $db_passwd))
{
$l->info ("Apache::SearchEngineLog: Database connection established");
}
else
{
$l->error ('Apache::SearchEngineLog: Unable to connect: ' . DBI->errstr ());
return 0;
}
if ($STH = $DBH->prepare ("INSERT INTO $db_table (date, domain, term, uri, vhost) VALUES (NOW(), ?, ?, ?, ?)"))
{
$LASTPING = time;
return 1;
}
else
{
$l->error ('Apache::SearchEngineLog: ' . $DBH->errstr ());
return 0;
}
return undef;
}
sub db_save
##########################################################################
# Saves the given arguments to the database. #
# #
# Arguments: #
# 0: Name of the remote server #
# 1: The URI requested #
# 2: Name of the virtual host #
# @: Terms used in the search engine #
# #
# Returns: #
# 0: true #
##########################################################################
{
my $server = shift;
my $uri = shift;
my $hostname = shift;
foreach my $term (@_)
{
if ($STH->execute ($server, $term, $uri, $hostname))
{
$LASTPING = time;
}
else
{
warn $STH->errstr ();
}
}
return 1;
}
sub handler
SearchEngineLog.pm view on Meta::CPAN
##########################################################################
{
my $s = Apache->server ();
my $l = $s->log ();
$REGEXEN =
{
qr#yahoo\.# => 'p',
qr#altavista\.# => 'q',
qr#msn\.# => 'q',
qr#voila\.# => 'kw',
qr#lycos\.# => 'query',
qr#search\.terra\.# => 'query',
qr#google\.(?!yahoo)# => 'q',
qr#alltheweb\.com# => 'q',
qr#netscape\.# => 'search',
qr#northernlight\.# => 'qr',
qr#dmoz\.org# => 'search',
qr#search\.aol\.com# => 'query',
qr#www\.search\.com# => 'q',
qr#askjeeves\.# => 'ask',
qr#hotbot\.# => 'mt',
qr#metacrawler\.# => 'general'
};
# ping database in this interval at the very most..
$TIMEOUT = (defined $ENV{'DBI_timeout'} ? $ENV{'DBI_timeout'} : 120);
connect_dbi ($l);
Apache->server->register_cleanup (\&cleanup);
$SERVER = {};
# load known servers from database.. this is mostly to speed up
# recognition later on..
my $sth = $DBH->prepare ("SELECT domain, field FROM config");
$sth->execute ();
while (my ($d, $f) = $sth->fetchrow_array ())
{
$SERVER->{$d} = $f;
}
$sth->finish ();
$l->debug ("Apache::SearchEngineLog: init done");
return 1;
}
__END__
=head1 NAME
Apache::SearchEngineLog - Logging of terms used in search engines
=head1 SYNOPSIS
#in httpd.conf
PerlSetEnv DBI_data_source dbi:driver:dsn
PerlSetEnv DBI_username username
PerlSetEnv DBI_password password
PerlSetEnv DBI_table db_table #optional, defaults to "hits"
PerlSetEnv DBI_timeout seconds #optional, defaults to 120
PerlModule Apache::SearchEngineLog
<Location /test>
PerlLogHandler Apache::SearchEngineLog
</Location>
=head1 DESCRIPTION
Apache::SearchEngineLog logs the terms used at a search engine into a SQL
Database, making it easy to analyse it and in turn optimize your website.
=head1 TABLE LAYOUT
The table "hits" should look somewhat like this:
+--------+-------------+------+-----+---------------------+-------+
| Field | Type | Null | Key | Default | Extra |
+--------+-------------+------+-----+---------------------+-------+
| term | varchar(50) | | | | |
| vhost | varchar(20) | | MUL | | |
| uri | varchar(50) | | | | |
| domain | varchar(20) | | | | |
| date | datetime | | | 0000-00-00 00:00:00 | |
+--------+-------------+------+-----+---------------------+-------+
This is the table "config":
+--------+-------------+------+-----+---------+-------+
| Field | Type | Null | Key | Default | Extra |
+--------+-------------+------+-----+---------+-------+
| domain | varchar(20) | | PRI | | |
| field | varchar(10) | | | | |
+--------+-------------+------+-----+---------+-------+
=head1 SEE ALSO
mod_perl(3), Apache(3)
=head1 AUTHOR
Florian Forster, octopus@verplant.org
=cut
( run in 0.797 second using v1.01-cache-2.11-cpan-e1769b4cff6 )