Apache-Wyrd

 view release on metacpan or  search on metacpan

Wyrd/Site/Index.pm  view on Meta::CPAN


=item (arrayref of hashrefs) C<get_children> (scalar, hashref)

Given an pagename (See Page in this subclass), the method returns the
entries of all children of that page in the navigation hierarchy.  The
arrayref is in the order determined by the Index object (see
Apache::Wyrd::Services::Index), and returns that data which is limited
optionally by the parameters specified in the hashref which is handed
directly to the get_entry method (see the get_entry method of the
Apache::Wyrd::Services::Index class).

=cut

sub get_children {
	my ($self, $parent, $params) = @_;
	my (@children) = ();
	my $index = $self->read_db;
	my $result = $index->db_get($self->make_key('children', $parent), my $packed_children);
	#warn $self->translate_packed($packed_children);
	my %children = unpack("n*", $packed_children);
	foreach my $key (keys %children) {
		my $child = $self->get_entry($key, $params);#turn id into hashref of contents
		#warn "child - $child->{id}";
		$child->{'rank'} = $children{$child->{id}};
		push @children, $child;
	}
	return \@children;
}

=pod

=item (scalar) C<index_site> (Apache req handle, scalar)

This method is an obsolete way of running through the files of a site and
committing them to index.  Please use the much newer and fault-tolerant
Apache::Wyrd::Site::IndexBot.

That being said, the method takes the current Apache request object handle,
and a scalar which indicates whether it should perform a complete index or
only update since the last time this flag was non-null, and returns the text
output of the update process.

=cut

sub index_site {
	my ($self, $req, $fastindex) = @_;
	die ("index site requires an Apache request object, not a: " . ref($req)) unless (ref($req) eq 'Apache');
	my $lastindex = undef;
	my $hostname = $req->server->server_hostname;
	my $root = $req->document_root;
	my $out = $self->purge_missing($req);
	my $ua = $self->ua;
	$ua->timeout(60);
	local $| = 1;
	open (FILES, '-|', "/usr/bin/find $root -name \*.html");
	my $counter = 0;
	$lastindex = ${slurp_file($root. "/var/lastindex.db")};
	my $newest = $lastindex;
	while (<FILES>) {
		chomp;
		my @stats = stat($_);
		#warn "Document status/lastindex/current newest:" . join('/', $stats[9], $lastindex, $newest);
		$newest = $stats[9] if ($stats[9] > $newest);
		$counter++;
		next if ($fastindex and ($stats[9] < $lastindex));
		s/$root//;
		next if $self->skip_file($_);
		my $url = "http://$hostname$_";
		my $response = $ua->request(GET $url);
		my $status = $response->status_line;
		$out .= "<br>$_: OK" if ($status =~ /200|OK/);
		$out .= ("<br>Problem with $_: $status") unless ($status =~ /200|OK/);
	}
	$out = "<b><p>$counter files indexed:</p></b>" . $out;
	spit_file($root . '/var/lastindex.db', $newest);
	return $out;
}

=pod

=item (hashref) C<lookup> (scalar)

or

=item (scalar) C<lookup> (scalar, scalar)

Look up and return data from the index.  In both forms, the first argument
is a scalar representation of the page.  This can be the page name, which
means the path after document root or the page's internal index ID (an
integer).

If the specific attribute is not given, the method returns a hashref of the
full data for the page.  If the attribute is given, only the value of that
attribute is given.

=cut

sub lookup {
	#universal lookup mechanism.  Use attribute as well as page path to get a scalar.
	my ($self, $name, $attribute) = @_;
	my $index = $self->read_db;
	#warn("looking up $attribute for $name");
	my ($id, $new) = $self->get_id($name);
	#warn("found id '$id' new: $new");
	return {} if ($new and not($attribute));
	return undef if ($new);
	if ($attribute) {
		my $key = $self->make_key($attribute, $id);
		my $failed = $index->db_get($key, my $out);
		#warn "failed" if $failed;
		return undef if ($failed);
		return $out;
	} else {
		return $self->get_entry($id);
	}
}

=pod

=item (scalar) C<purge_missing> (Apache request handle)



( run in 2.075 seconds using v1.01-cache-2.11-cpan-99c4e6809bf )