Apache-SearchEngineLog

 view release on metacpan or  search on metacpan

SearchEngineLog.pm  view on Meta::CPAN

}

sub check_regexen
##########################################################################
# Uses regexen to check which rule applies to a given server (if any)    #
# and returns the parameter field which contains usefull information.    #
#                                                                        #
# Arguments:                                                             #
#  0: Name of server as string                                           #
#                                                                        #
# Returns:                                                               #
#  0: Name of parameter                                                  #
##########################################################################
{
	my $server = shift;
	my $retval = '';

	foreach my $re (keys %$REGEXEN)
	{
		if ($server =~ m#$re#)
		{
			$retval = $REGEXEN->{$re};
			last;
		}
	}

	return $retval;
}

sub cleanup
##########################################################################
# Checks wether DB connection is alive by pinging it periodically.       #
# Reconnects if neccessary.                                              #
#                                                                        #
# Arguments:                                                             #
#  None                                                                  #
#                                                                        #
# Returns:                                                               #
#  0: true                                                               #
##########################################################################
{
	$DBH->disconnect ();
	return 1;
}

sub connect_dbi
##########################################################################
# Connects to the database.                                              #
#                                                                        #
# Arguments:                                                             #
#  0: Apache::Log object                                                 #
#                                                                        #
# Returns:                                                               #
#  0: true if successfully connected, false otherwise                    #
##########################################################################
{
	my $l = shift;

	my $db_source = $ENV{'DBI_data_source'} or $l->error ("Apache::SearchEngineLog: DBI_data_source not defined");
	my $db_user   = $ENV{'DBI_username'} or $l->error ("Apache::SearchEngineLog: DBI_username not defined");
	my $db_passwd = $ENV{'DBI_password'} or $l->error ("Apache::SearchEngineLog: DBI_password not defined");
	my $db_table  =	(defined $ENV{'DBI_table'} ? $ENV{'DBI_table'} : 'hits');

	if ($DBH = DBI->connect ($db_source, $db_user, $db_passwd))
	{
		$l->info ("Apache::SearchEngineLog: Database connection established");
	}
	else
	{
		$l->error ('Apache::SearchEngineLog: Unable to connect: ' . DBI->errstr ());
		return 0;
	}

	if ($STH = $DBH->prepare ("INSERT INTO $db_table (date, domain, term, uri, vhost) VALUES (NOW(), ?, ?, ?, ?)"))
	{
		$LASTPING = time;
		return 1;
	}
	else
	{
		$l->error ('Apache::SearchEngineLog: ' . $DBH->errstr ());
		return 0;
	}
	
	return undef;
}

sub db_save
##########################################################################
# Saves the given arguments to the database.                             #
#                                                                        #
# Arguments:                                                             #
#  0: Name of the remote server                                          #
#  1: The URI requested                                                  #
#  2: Name of the virtual host                                           #
#  @: Terms used in the search engine                                    #
#                                                                        #
# Returns:                                                               #
#  0: true                                                               #
##########################################################################
{
	my $server = shift;
	my $uri = shift;
	my $hostname = shift;

	foreach my $term (@_)
	{
		if ($STH->execute ($server, $term, $uri, $hostname))
		{
			$LASTPING = time;
		}
		else
		{
			warn $STH->errstr ();
		}
	}

	return 1;
}

sub handler

SearchEngineLog.pm  view on Meta::CPAN

##########################################################################
{
	my $s = Apache->server ();
	my $l = $s->log ();

	$REGEXEN =
	{
		qr#yahoo\.#		=>	'p',
		qr#altavista\.#		=>	'q',
		qr#msn\.#		=>	'q',
		qr#voila\.#		=>	'kw',
		qr#lycos\.#		=>	'query',
		qr#search\.terra\.#	=>	'query',
		qr#google\.(?!yahoo)#	=>	'q',
		qr#alltheweb\.com#	=>	'q',
		qr#netscape\.#		=>	'search',
		qr#northernlight\.#	=>	'qr',
		qr#dmoz\.org#		=>	'search',
		qr#search\.aol\.com#	=>	'query',
		qr#www\.search\.com#	=>	'q',
		qr#askjeeves\.#		=>	'ask',
		qr#hotbot\.#		=>	'mt',
		qr#metacrawler\.#	=>	'general'
	};

	# ping database in this interval at the very most..
	$TIMEOUT = (defined $ENV{'DBI_timeout'} ? $ENV{'DBI_timeout'} : 120);
	connect_dbi ($l);

	Apache->server->register_cleanup (\&cleanup);

	$SERVER = {};

	# load known servers from database.. this is mostly to speed up
	# recognition later on..
	my $sth = $DBH->prepare ("SELECT domain, field FROM config");
	$sth->execute ();
	while (my ($d, $f) = $sth->fetchrow_array ())
	{
		$SERVER->{$d} = $f;
	}
	$sth->finish ();

	$l->debug ("Apache::SearchEngineLog: init done");

	return 1;
}

__END__

=head1 NAME

Apache::SearchEngineLog - Logging of terms used in search engines

=head1 SYNOPSIS

  #in httpd.conf

  PerlSetEnv DBI_data_source  dbi:driver:dsn
  PerlSetEnv DBI_username     username
  PerlSetEnv DBI_password     password
  PerlSetEnv DBI_table        db_table #optional, defaults to "hits"
  PerlSetEnv DBI_timeout      seconds  #optional, defaults to 120

  PerlModule Apache::SearchEngineLog

  <Location /test>
    PerlLogHandler Apache::SearchEngineLog
  </Location>

=head1 DESCRIPTION

Apache::SearchEngineLog logs the terms used at a search engine into a SQL
Database, making it easy to analyse it and in turn optimize your website.

=head1 TABLE LAYOUT

  The table "hits" should look somewhat like this:

  +--------+-------------+------+-----+---------------------+-------+
  | Field  | Type        | Null | Key | Default             | Extra |
  +--------+-------------+------+-----+---------------------+-------+
  | term   | varchar(50) |      |     |                     |       |
  | vhost  | varchar(20) |      | MUL |                     |       |
  | uri    | varchar(50) |      |     |                     |       |
  | domain | varchar(20) |      |     |                     |       |
  | date   | datetime    |      |     | 0000-00-00 00:00:00 |       |
  +--------+-------------+------+-----+---------------------+-------+

  This is the table "config":

  +--------+-------------+------+-----+---------+-------+
  | Field  | Type        | Null | Key | Default | Extra |
  +--------+-------------+------+-----+---------+-------+
  | domain | varchar(20) |      | PRI |         |       |
  | field  | varchar(10) |      |     |         |       |
  +--------+-------------+------+-----+---------+-------+

=head1 SEE ALSO

mod_perl(3), Apache(3)

=head1 AUTHOR

Florian Forster, octopus@verplant.org

=cut



( run in 0.797 second using v1.01-cache-2.11-cpan-e1769b4cff6 )