Apache-Wyrd

 view release on metacpan or  search on metacpan

Wyrd/Site/IndexBot.pm  view on Meta::CPAN

		}
	}
	print "<b><p>$counter files indexed</p></b>";

	#Save the date to the lastindex file.
	spit_file($lastfile, $newest);
	return;
}

sub purge_missing {
	my ($self, $instance) = @_;
	my @no_skip = ();
	my $root = $self->{'document_root'};
	print "<P>First checking for deleted documents and corrupt data";
	my $index = $instance->write_db;
	my %ismap = ();
	foreach my $value (keys %{$instance->maps}) {
		$value = $instance->attributes->{$value};
		$ismap{$value} = 1;
	}
	my %exists = ();
	my %reverse = ();
	my %force_purge = ();
	my $cursor = $index->db_cursor;
	$cursor->c_get(my $id, my $document, DB_FIRST);
	do {
		my ($first, $second, $identifier) = unpack('aaa*', $id);
		if ($second ne '%') {
			#if the metachar is not there, this is a primary filename map.
			$exists{$id} = $document || 'error: unnamed entry';
		} elsif ($first eq "\0") {
			#if the metachar is 0, this is a reversemap
			$reverse{$document} = $identifier;
		}
	} until ($cursor->c_get($id, $document, DB_NEXT));
	undef $cursor;
	foreach my $id (keys %exists) {
		my $document = $exists{$id};
		if ($reverse{$id} ne $exists{$id}) {
			print "Entry $id for $exists{$id} seems to be a duplicate entry.  Deleting it prior to purge...";
			my $result = $index->db_del($id);
			$force_purge{$id} = 1;
			if ($result) {
				print "Failed to delete dangling entry $id.  Manual repair may be necessary...";
			}
		} elsif (-f ($root . $document)) {
			#document exists as a file
			print"keeping $root$document";
		} else {
			my $entry = $instance->get_entry($id);
			my $file = $entry->{'file'};
			if (-f ($root . $file)) {
				push @no_skip, $entry;
				if ($document =~ /^\//) {
					print "purging $document, since it's been deleted, but <span class=\"error\">you need to delete the proxy page $file</span>: ". $instance->purge_entry($id);
				} else {
					print "keeping $document, since it's off-site but the proxy ($file) exists";
				}
			} elsif ($document eq '<DELETED>') {
				if ($self->{'realclean'}) {
					print"purging dirty reference to an updated document: ". $instance->purge_entry($id);
				} else {
					print"skipping dirty reference to a previously deleted document";
				}
			} elsif ($document =~ /^\//) {
				print "purging proxy reference to deleted document $root$document: ". $instance->purge_entry($id);
			} else {
				print "purging reference to a dropped proxy to $document ($file): ". $instance->purge_entry($id);
			}
		}
	}
	#re-invoke an instance of cursor since db may have changed (just in case)
	$cursor = $index->db_cursor;
	$cursor->c_get(my $id, my $document, DB_FIRST);
	do {
		my ($attribute, $separator, $current_id) = unpack('aaa*', $id);
		if ($separator ne '%') {
			#do nothing with primary data
		} elsif ($ismap{$attribute}) {
			my $do_update = 0;
			my $value = '';
			my @ids = ();
			my(%entries) = unpack("n*", $document);
			foreach my $item (keys %entries) {
				if (not($exists{$item}) or $force_purge{$item}) {
					$do_update = 1;
					push @ids, $item;
					next;
				}
				$value .= pack "n", $item;
				$value .= pack "n", $entries{$item};
			}
			if ($do_update) {
				my $ids = join ', ', @ids;
				my $error = $index->db_put($id, $value);
				my $ord = unpack "C", $id;
				print "WARNING: purged corrupt map data for nonexistent ids $ids &#151; " . ($instance->attribute_list->[$ord] || "Unknown attribute [$ord]") . " (id# $current_id): " . ($error ? 'failed!' : 'succeeded.');
			}
		} elsif (($attribute eq "\x00") and not(-f ($root . $current_id))) {
			if ($current_id !~ m#^https?://#) {
				my $error = $index->db_del($id);
				my $ord = unpack "C", $id;
				print "WARNING: purged reverse filemap for nonexistent file $current_id &#151; " . ($instance->attribute_list->[$ord] || "Unknown attribute [$ord]") . " (id# $current_id): ". ($error ? 'failed!' : 'succeeded.');
			};
		} elsif ($attribute eq "\xff") {
			#do nothing to global metadata
		} elsif (not($current_id)) {
			print "Strange null entry under attribute " . $instance->attribute_list->[unpack "C", $id] . "... Your guess is as good ad mine...";
		} elsif ($force_purge{$current_id} or (not(($attribute eq "\x00")) and not($exists{$current_id}))) {
			my $error = $index->db_del($id);
			my $ord = unpack "C", $id;
			print "WARNING: purged corrupt data for nonexistent id $current_id &#151; " . ($instance->attribute_list->[$ord] || "Unknown attribute [$ord]") . " (id# $current_id): ". ($error ? 'failed!' : 'succeeded.');
		}
	} until ($cursor->c_get($id, $document, DB_NEXT));
	$cursor->c_close;
	$instance->close_db;
	print "</p>";
	return @no_skip;
}




( run in 2.065 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )