Apache-Wyrd

 view release on metacpan or  search on metacpan

Wyrd/Services/MySQLIndex.pm  view on Meta::CPAN

# 	my @tables = map {"$_ write"} @{$self->tables};
# 	my $clause = join ', ', @tables;
# 	my $sh = $db->prepare('lock tables ' . $clause);
# 	$sh->execute;
# 	if ($sh->err) {
# 		$self->set_error($sh->errstr);
# 	} else {
		$self->{'db'} = $db;
		$self->newstatus('W');
# 	}
	return;
}

sub recover_db {
	&obsolete;
}

sub close_db {
	my ($self) = @_;
	return if ($self->{'status'} eq 'C');
# 	if ($self->{'status'} eq 'W') {
# 		my $db = $self->{'db'};
# 		my $sh = $db->prepare('unlock tables');
# 		$sh->execute;
# 		if ($sh->err) {
# 			$self->set_error($sh->errstr);
# 		}
# 	}
	$self->{'db'} = undef;
	$self->{'status'} = 'C';
	return;
}

sub update_entry {
	my ($self, $entry) = @_;

	#Make sure the object being sent to this update is a valid one.
	$self->set_error = "Index entries must be objects " unless (ref($entry));
	#localize debug value so that the entry can modify it.
	my $debug = $self->debug;
	$self->{'runtime_flags'} = {};
	if (UNIVERSAL::can($entry, 'index_runtime_flags')) {
		map {$self->{'runtime_flags'}->{$_} => 1} token_parse($entry->index_runtime_flags);
		$debug = 1 if ($self->{'runtime_flags'}->{'debug'});
		$debug = 0 if ($self->{'runtime_flags'}->{'nodebug'});
	}
	foreach my $function (qw/no_index index_name index_timestamp index_digest index_data/) {
		$self->set_error("Index entries must implement the method $function\(\)") unless ($entry->can($function));
	}

	#check that the name is OK
	my $name = $entry->index_name;
	$self->set_error("Index entries must return non-null for method index_name()") unless ($name);
	$self->check_error;
	$self->set_error("<DELETED> is an invalid name for index entries ") if ($name eq '<DELETED>');
	$self->set_error($name . " is an invalid name for index entries ") if ($name =~ /^.%/s);
	$self->check_error;

	#everything OK?  Start the DB handle and check that it is supposed to be indexed.
	$self->read_db;
	my ($id, $not_found_flag) = $self->get_id($name);

	#If this entry has been set not to index, make sure it is not in the index and return.
	if ($entry->no_index) {
		if ($not_found_flag) {
			#if key is not found
			return "yes to no_index and not indexed.";
		}
		$self->write_db;
		my $result = $self->purge_entry($id);
		$self->close_db;
		return $result;
	}

	$debug && warn $name . " is new" if ($not_found_flag);
	my $current_timestamp = undef; #lexically scoped to reduce multiple timestamp calculations
	my $current_digest = undef; #lexically scoped to reduce multiple digest calculations
	unless ($entry->force_update) {
		my $sh = $self->db->prepare("select id, timestamp, digest from _wyrd_index where name=?");
		$sh->execute($name);
		my ($id, $timestamp, $digest) = @{$sh->fetchrow_arrayref || []};
		$current_timestamp = $entry->index_timestamp;
		$debug && warn "Comparing timestamps: $timestamp <-> " . $current_timestamp . " for " . $name;
		if ($timestamp eq $current_timestamp) {
			$debug && warn "No update needed.  Timestamp is $timestamp." ;
			return "No update needed.  Timestamp is $timestamp." ;
		}
		if ($timestamp) {
			#Timestamp was found and is different, so calculate an sha1 fingerprint and see if there really
			#has been a change.
			$current_digest = $entry->index_digest;
			$debug && warn "Comparing digests: $digest <-> " . $current_digest . " for " . $name;
			if ($digest eq $current_digest) {
				$self->write_db;
				$self->update_key($id, 'timestamp', $current_timestamp);
				$self->close_db;
				$debug && warn "Updated timestamp only, since digest was identical.";
				return "Updated timestamp only, since digest was identical.";
			}
		}
	}

	#We are sure the object's entry is out-of-date, so it's time to update.
	$self->write_db;

	#TODO: Add a new way of handling transactions

	my %entry = ();
	$self->purge_entry($id) unless ($not_found_flag); #necessary to clear out words which will not match
	$entry{'name'} = $name;
	$entry{'timestamp'} = $current_timestamp;
	$entry{'digest'} = $current_digest || $entry->index_digest;
	$entry{'title'} = $entry->index_title if ($entry->can('index_title'));
	$entry{'keywords'} = $entry->index_keywords if ($entry->can('index_keywords'));
	$entry{'description'} = $entry->index_description if ($entry->can('index_description'));

	my $field_clause = '(' . join(', ', keys %entry) . ')';
	my $value_clause = 	'('
						. join(
								', ',
									(
										map {$self->db->quote($_)} values %entry
									)
						)
						. ')';

	my $sh = $self->db->prepare("insert into _wyrd_index $field_clause values $value_clause");
	$sh->execute;
	$id = $sh->{'mysql_insertid'};

	if ($sh->err) {
		$self->set_error($sh->errstr);
	} else {

		$self->process_html($id, $entry->index_data);

		if ($self->extended) {
			my @attributes = @{$self->attribute_list};
			splice(@attributes, 0, 8);
			foreach my $attribute (@attributes) {
				my $value = undef;
				if ($entry->can("index_$attribute")) {
					eval('$value = $entry->index_' . $attribute);
					$self->set_error($@) if ($@);
					$self->check_error;
				} elsif (exists($entry->{$attribute})) {
					$value = $entry->{$attribute};
				}
				if ($entry->can("handle_$attribute")) {
					eval('$entry->handle_' . $attribute . '($id, $value)');
					$self->set_error($@) if ($@);
					$self->check_error;
				} else {
					if ($self->maps->{$attribute}) {
						if (defined($value)) {
							$self->index_map($attribute, $id, [token_parse(lc($value))]);
						}
					}
						if (defined($value)) {
							$self->update_key($id, $attribute, $value);
						}
				}
			}
		}
	}


	#TODO: Deal with failed update transactions here.

Wyrd/Services/MySQLIndex.pm  view on Meta::CPAN

	if (ref($id) eq 'ARRAY') {
		unless (scalar(@$id)) {
			$debug && warn "get_entry() was passed an empty array, aborting.";
			return;
		}
		$in_clause = join ', ', @$id;
		$in_clause = qq{in ($in_clause)};
	} else {
		unless (defined($id) and not(ref($id))) {
			if (ref($id)) {
				$debug && warn "get_entry was passed an invalid reference, aborting.";
			} else {
				$debug && warn "get_entry was passed an undefined value, aborting.";
			}
			return;
		}
		$in_clause = qq{='$id'};
	}
	$params = {} unless (ref($params) eq 'HASH');

	my @attributes = @{$self->attribute_list};
	my %skip = map {$_ => 1} (@{$params->{'skip'} || []}, @{$self->map_list}, 'name', 'id');
	@attributes = grep {!$skip{$_}} @attributes;
	if ($params->{'limit'}) {
		my %limit = map {$_ => 1} @{$params->{'limit'}};
		@attributes = grep {$limit{$_}} @attributes;
	}
	if ($params->{'require'}) {
		my %unique = ();
		@attributes = grep {$unique{$_}++ == 0} (@attributes, @{$params->{'require'}});
	}

	my $attributes = join (", ", @attributes);
	$self->read_db;
	my $sh = $self->db->prepare("select id, name, $attributes from _wyrd_index where id $in_clause");
	$sh->execute;
	if ($sh->err) {
		$self->set_error($sh->errstr);
	}
	my @entries = ();
	while(my $data_ref = $sh->fetchrow_hashref) {
		#copy off the data to a hash
		my %entry = %$data_ref;
		push @entries, \%entry;
	}
	$self->close_db;
	if (wantarray) {
		return @entries;
	} else {
		return $entries[0];
	}
}

sub get_id {
	my ($self, $name) = @_;
	my $sh = $self->db->prepare('select id from _wyrd_index where name=?');
	$sh->execute($name);
	if ($sh->err) {
		$self->set_error($sh->errstr);
	}
	my $not_found = undef;
	my $data_ref = $sh->fetchrow_arrayref;
	my $id = $data_ref->[0];
	unless ($id) {
		$not_found = 1;
	}
	if (wantarray) {
		return ($id, $not_found);
	}
	return $id;
}

sub get_value {
	my ($self, $id, $attribute) = @_;
	my $sh = $self->db->prepare("select $attribute from _wyrd_index where id=?");
	$sh->execute($id);
	if ($sh->err) {
		$self->set_error($sh->errstr);
	}
	my $data_ref = $sh->fetchrow_arrayref;
	my $value = $data_ref->[0];
	return $value;
}

sub update_key {
	my ($self, $id, $attribute, $value) = @_;
	my $sh = $self->db->prepare("update _wyrd_index set $attribute=? where id=?");
	$sh->execute($value, $id);
	if ($sh->err) {
		$self->set_error($sh->errstr);
	}
	return;
}

sub delete_key {
	&obsolete;
}

sub process_html {
	my ($self, $id, $data) = @_;

	return if ($self->{'runtime_flags'}->{'no_data'});

	#Remove all punctuation noise from the data
	$data = $self->clean_html($data);

	$self->update_key($id, 'data', $data);
	my $wordcount = $self->index_words($id, $data);
	$self->update_key($id, 'wordcount', $wordcount);

	return;
}

sub extract_html {
	&obsolete;
}

sub index_words {
	my ($self, $id, $data) = @_;
	# Split text into Array of words
	my (@words) = split(/\s+/, $data);
	$self->index_map('data', $id, \@words);
	return scalar(@words);
}

sub index_map {
	my ($self, $attribute_name, $id, $data) = @_;



( run in 0.844 second using v1.01-cache-2.11-cpan-39bf76dae61 )