Apache-Wyrd

 view release on metacpan or  search on metacpan

Wyrd/Services/MySQLIndex.pm  view on Meta::CPAN

	if ($self->status eq 'R') {
		return $self->db;
	} elsif ($self->status eq 'W') {
		$self->close_db;
	}
	my $db = $self->dbh;
# 	my @tables = map {"$_ read"} @{$self->tables};
# 	my $clause = join ', ', @tables;
# 	my $sh = $db->prepare('lock tables ' . $clause);
# 	$sh->execute;
# 	if ($sh->err) {
# 		$self->set_error($sh->errstr);
# 	} else {
		$self->{'db'} = $db;
		$self->newstatus('R');
#	}
	return;
}

sub write_db {
	my ($self) = @_;
	if ($self->status eq 'W') {
		return $self->db;
 	} elsif ($self->status eq 'R') {
		$self->close_db;
	}
	my $db = $self->dbh;
# 	my @tables = map {"$_ write"} @{$self->tables};
# 	my $clause = join ', ', @tables;
# 	my $sh = $db->prepare('lock tables ' . $clause);
# 	$sh->execute;
# 	if ($sh->err) {
# 		$self->set_error($sh->errstr);
# 	} else {
		$self->{'db'} = $db;
		$self->newstatus('W');
# 	}
	return;
}

sub recover_db {
	&obsolete;
}

sub close_db {
	my ($self) = @_;
	return if ($self->{'status'} eq 'C');
# 	if ($self->{'status'} eq 'W') {
# 		my $db = $self->{'db'};
# 		my $sh = $db->prepare('unlock tables');
# 		$sh->execute;
# 		if ($sh->err) {
# 			$self->set_error($sh->errstr);
# 		}
# 	}
	$self->{'db'} = undef;
	$self->{'status'} = 'C';
	return;
}

sub update_entry {
	my ($self, $entry) = @_;

	#Make sure the object being sent to this update is a valid one.
	$self->set_error = "Index entries must be objects " unless (ref($entry));
	#localize debug value so that the entry can modify it.
	my $debug = $self->debug;
	$self->{'runtime_flags'} = {};
	if (UNIVERSAL::can($entry, 'index_runtime_flags')) {
		map {$self->{'runtime_flags'}->{$_} => 1} token_parse($entry->index_runtime_flags);
		$debug = 1 if ($self->{'runtime_flags'}->{'debug'});
		$debug = 0 if ($self->{'runtime_flags'}->{'nodebug'});
	}
	foreach my $function (qw/no_index index_name index_timestamp index_digest index_data/) {
		$self->set_error("Index entries must implement the method $function\(\)") unless ($entry->can($function));
	}

	#check that the name is OK
	my $name = $entry->index_name;
	$self->set_error("Index entries must return non-null for method index_name()") unless ($name);
	$self->check_error;
	$self->set_error("<DELETED> is an invalid name for index entries ") if ($name eq '<DELETED>');
	$self->set_error($name . " is an invalid name for index entries ") if ($name =~ /^.%/s);
	$self->check_error;

	#everything OK?  Start the DB handle and check that it is supposed to be indexed.
	$self->read_db;
	my ($id, $not_found_flag) = $self->get_id($name);

	#If this entry has been set not to index, make sure it is not in the index and return.
	if ($entry->no_index) {
		if ($not_found_flag) {
			#if key is not found
			return "yes to no_index and not indexed.";
		}
		$self->write_db;
		my $result = $self->purge_entry($id);
		$self->close_db;
		return $result;
	}

	$debug && warn $name . " is new" if ($not_found_flag);
	my $current_timestamp = undef; #lexically scoped to reduce multiple timestamp calculations
	my $current_digest = undef; #lexically scoped to reduce multiple digest calculations
	unless ($entry->force_update) {
		my $sh = $self->db->prepare("select id, timestamp, digest from _wyrd_index where name=?");
		$sh->execute($name);
		my ($id, $timestamp, $digest) = @{$sh->fetchrow_arrayref || []};
		$current_timestamp = $entry->index_timestamp;
		$debug && warn "Comparing timestamps: $timestamp <-> " . $current_timestamp . " for " . $name;
		if ($timestamp eq $current_timestamp) {
			$debug && warn "No update needed.  Timestamp is $timestamp." ;
			return "No update needed.  Timestamp is $timestamp." ;
		}
		if ($timestamp) {
			#Timestamp was found and is different, so calculate an sha1 fingerprint and see if there really
			#has been a change.
			$current_digest = $entry->index_digest;
			$debug && warn "Comparing digests: $digest <-> " . $current_digest . " for " . $name;
			if ($digest eq $current_digest) {
				$self->write_db;

Wyrd/Services/MySQLIndex.pm  view on Meta::CPAN

		my %limit = map {$_ => 1} @{$params->{'limit'}};
		@attributes = grep {$limit{$_}} @attributes;
	}
	if ($params->{'require'}) {
		my %unique = ();
		@attributes = grep {$unique{$_}++ == 0} (@attributes, @{$params->{'require'}});
	}

	my $attributes = join (", ", @attributes);
	$self->read_db;
	my $sh = $self->db->prepare("select id, name, $attributes from _wyrd_index where id $in_clause");
	$sh->execute;
	if ($sh->err) {
		$self->set_error($sh->errstr);
	}
	my @entries = ();
	while(my $data_ref = $sh->fetchrow_hashref) {
		#copy off the data to a hash
		my %entry = %$data_ref;
		push @entries, \%entry;
	}
	$self->close_db;
	if (wantarray) {
		return @entries;
	} else {
		return $entries[0];
	}
}

sub get_id {
	my ($self, $name) = @_;
	my $sh = $self->db->prepare('select id from _wyrd_index where name=?');
	$sh->execute($name);
	if ($sh->err) {
		$self->set_error($sh->errstr);
	}
	my $not_found = undef;
	my $data_ref = $sh->fetchrow_arrayref;
	my $id = $data_ref->[0];
	unless ($id) {
		$not_found = 1;
	}
	if (wantarray) {
		return ($id, $not_found);
	}
	return $id;
}

sub get_value {
	my ($self, $id, $attribute) = @_;
	my $sh = $self->db->prepare("select $attribute from _wyrd_index where id=?");
	$sh->execute($id);
	if ($sh->err) {
		$self->set_error($sh->errstr);
	}
	my $data_ref = $sh->fetchrow_arrayref;
	my $value = $data_ref->[0];
	return $value;
}

sub update_key {
	my ($self, $id, $attribute, $value) = @_;
	my $sh = $self->db->prepare("update _wyrd_index set $attribute=? where id=?");
	$sh->execute($value, $id);
	if ($sh->err) {
		$self->set_error($sh->errstr);
	}
	return;
}

sub delete_key {
	&obsolete;
}

sub process_html {
	my ($self, $id, $data) = @_;

	return if ($self->{'runtime_flags'}->{'no_data'});

	#Remove all punctuation noise from the data
	$data = $self->clean_html($data);

	$self->update_key($id, 'data', $data);
	my $wordcount = $self->index_words($id, $data);
	$self->update_key($id, 'wordcount', $wordcount);

	return;
}

sub extract_html {
	&obsolete;
}

sub index_words {
	my ($self, $id, $data) = @_;
	# Split text into Array of words
	my (@words) = split(/\s+/, $data);
	$self->index_map('data', $id, \@words);
	return scalar(@words);
}

sub index_map {
	my ($self, $attribute_name, $id, $data) = @_;

	my $debug = $self->debug;
	$debug = 1 if ($self->{'runtime_flags'}->{'debug'});

	my $table = '_wyrd_index_' . $attribute_name;
	$debug && warn "mapping $id - $attribute_name : " . Dumper($data);

	my (%unique, $item, @items) = (); # for unique-ifying word list

	#remove duplicates if necessary
	if (ref($data) eq 'ARRAY') {
		@items = grep { $unique{$_}++ == 0 } @$data;
	} elsif (ref($data) eq 'HASH') {
		#IMPORTANT: %unique is lexically scoped out of this point in order to
		#use it to hold data counts below.
		%unique = %$data;
		@items = keys(%unique);
	} else {



( run in 0.470 second using v1.01-cache-2.11-cpan-0bb4e1dffa6 )