Apache-Wyrd
view release on metacpan or search on metacpan
Wyrd/Site/IndexBot.pm view on Meta::CPAN
package Apache::Wyrd::Site::IndexBot;
use strict;
use base qw(Apache::Wyrd::Bot);
use Apache::Wyrd::Services::SAK qw(:file);
use HTTP::Request::Common;
use BerkeleyDB;
our $VERSION = '0.98';
=pod
=head1 NAME
Apache::Wyrd::Site::IndexBot - Sample 'bot for forcing index builds
=head1 SYNOPSIS
Sample Implementation:
package BASENAME::IndexBot;
use strict;
use base qw(Apache::Wyrd::Site::MySQLIndexBot BASENAME::Wyrd);
use BASENAME::Index;
sub params {
my ($self) = @_;
my $params = {
basefile => $self->dbl->req->document_root . '/var/indexbot',
server_hostname => $self->dbl->req->server->server_hostname,
document_root => $self->dbl->req->document_root,
fastindex => $self->_flags->fastindex || 0,
purge => $self->_flags->purge || 0,
realclean => $self->_flags->realclean || 0,
};
return $params;
}
sub _work {
my ($self) = @_;
my $index = BASENAME::Index->new;
$index->delete_index if ($self->{'purge'});
$self->index_site($index);
}
Sample Usage:
<BASENAME::IndexBot refresh="20" expire="40" flags="reverse, purge">
<BASENAME::Template name="meta">$:meta</BASENAME::Attribute>
<H1>Rebuilding the Index</H1>
<H2>$:status</H2>
$:view
</BASENAME::Page>
</BASENAME::IndexBot>
=head1 DESCRIPTION
The IndexBot is an C<Apache::Wyrd::Bot> object which performs the action of
causing a site to be completely indexed, and any remaining deleted documents
purged from the index. It does so by reading the name of existing files from
the document root down, purging files that are no longer found in that file-
tree, and generating HTTP requests for all the pages which are found.
As these pages are "Indexable Pages", they update their own index pages when
loaded by the server in answer to the HTTP request.
It should be used in a webmaster-protected section of the site for two
( run in 0.704 second using v1.01-cache-2.11-cpan-0bb4e1dffa6 )