Apache-Wyrd
view release on metacpan or search on metacpan
Wyrd/Services/MySQLIndex.pm view on Meta::CPAN
# my @tables = map {"$_ write"} @{$self->tables};
# my $clause = join ', ', @tables;
# my $sh = $db->prepare('lock tables ' . $clause);
# $sh->execute;
# if ($sh->err) {
# $self->set_error($sh->errstr);
# } else {
$self->{'db'} = $db;
$self->newstatus('W');
# }
return;
}
sub recover_db {
&obsolete;
}
sub close_db {
my ($self) = @_;
return if ($self->{'status'} eq 'C');
# if ($self->{'status'} eq 'W') {
# my $db = $self->{'db'};
# my $sh = $db->prepare('unlock tables');
# $sh->execute;
# if ($sh->err) {
# $self->set_error($sh->errstr);
# }
# }
$self->{'db'} = undef;
$self->{'status'} = 'C';
return;
}
sub update_entry {
my ($self, $entry) = @_;
#Make sure the object being sent to this update is a valid one.
$self->set_error = "Index entries must be objects " unless (ref($entry));
#localize debug value so that the entry can modify it.
my $debug = $self->debug;
$self->{'runtime_flags'} = {};
if (UNIVERSAL::can($entry, 'index_runtime_flags')) {
map {$self->{'runtime_flags'}->{$_} => 1} token_parse($entry->index_runtime_flags);
$debug = 1 if ($self->{'runtime_flags'}->{'debug'});
$debug = 0 if ($self->{'runtime_flags'}->{'nodebug'});
}
foreach my $function (qw/no_index index_name index_timestamp index_digest index_data/) {
$self->set_error("Index entries must implement the method $function\(\)") unless ($entry->can($function));
}
#check that the name is OK
my $name = $entry->index_name;
$self->set_error("Index entries must return non-null for method index_name()") unless ($name);
$self->check_error;
$self->set_error("<DELETED> is an invalid name for index entries ") if ($name eq '<DELETED>');
$self->set_error($name . " is an invalid name for index entries ") if ($name =~ /^.%/s);
$self->check_error;
#everything OK? Start the DB handle and check that it is supposed to be indexed.
$self->read_db;
my ($id, $not_found_flag) = $self->get_id($name);
#If this entry has been set not to index, make sure it is not in the index and return.
if ($entry->no_index) {
if ($not_found_flag) {
#if key is not found
return "yes to no_index and not indexed.";
}
$self->write_db;
my $result = $self->purge_entry($id);
$self->close_db;
return $result;
}
$debug && warn $name . " is new" if ($not_found_flag);
my $current_timestamp = undef; #lexically scoped to reduce multiple timestamp calculations
my $current_digest = undef; #lexically scoped to reduce multiple digest calculations
unless ($entry->force_update) {
my $sh = $self->db->prepare("select id, timestamp, digest from _wyrd_index where name=?");
$sh->execute($name);
my ($id, $timestamp, $digest) = @{$sh->fetchrow_arrayref || []};
$current_timestamp = $entry->index_timestamp;
$debug && warn "Comparing timestamps: $timestamp <-> " . $current_timestamp . " for " . $name;
if ($timestamp eq $current_timestamp) {
$debug && warn "No update needed. Timestamp is $timestamp." ;
return "No update needed. Timestamp is $timestamp." ;
}
if ($timestamp) {
#Timestamp was found and is different, so calculate an sha1 fingerprint and see if there really
#has been a change.
$current_digest = $entry->index_digest;
$debug && warn "Comparing digests: $digest <-> " . $current_digest . " for " . $name;
if ($digest eq $current_digest) {
$self->write_db;
$self->update_key($id, 'timestamp', $current_timestamp);
$self->close_db;
$debug && warn "Updated timestamp only, since digest was identical.";
return "Updated timestamp only, since digest was identical.";
}
}
}
#We are sure the object's entry is out-of-date, so it's time to update.
$self->write_db;
#TODO: Add a new way of handling transactions
my %entry = ();
$self->purge_entry($id) unless ($not_found_flag); #necessary to clear out words which will not match
$entry{'name'} = $name;
$entry{'timestamp'} = $current_timestamp;
$entry{'digest'} = $current_digest || $entry->index_digest;
$entry{'title'} = $entry->index_title if ($entry->can('index_title'));
$entry{'keywords'} = $entry->index_keywords if ($entry->can('index_keywords'));
$entry{'description'} = $entry->index_description if ($entry->can('index_description'));
my $field_clause = '(' . join(', ', keys %entry) . ')';
my $value_clause = '('
. join(
', ',
(
map {$self->db->quote($_)} values %entry
)
)
. ')';
my $sh = $self->db->prepare("insert into _wyrd_index $field_clause values $value_clause");
$sh->execute;
$id = $sh->{'mysql_insertid'};
if ($sh->err) {
$self->set_error($sh->errstr);
} else {
$self->process_html($id, $entry->index_data);
if ($self->extended) {
my @attributes = @{$self->attribute_list};
splice(@attributes, 0, 8);
foreach my $attribute (@attributes) {
my $value = undef;
if ($entry->can("index_$attribute")) {
eval('$value = $entry->index_' . $attribute);
$self->set_error($@) if ($@);
$self->check_error;
} elsif (exists($entry->{$attribute})) {
$value = $entry->{$attribute};
}
if ($entry->can("handle_$attribute")) {
eval('$entry->handle_' . $attribute . '($id, $value)');
$self->set_error($@) if ($@);
$self->check_error;
} else {
if ($self->maps->{$attribute}) {
if (defined($value)) {
$self->index_map($attribute, $id, [token_parse(lc($value))]);
}
}
if (defined($value)) {
$self->update_key($id, $attribute, $value);
}
}
}
}
}
#TODO: Deal with failed update transactions here.
Wyrd/Services/MySQLIndex.pm view on Meta::CPAN
if (ref($id) eq 'ARRAY') {
unless (scalar(@$id)) {
$debug && warn "get_entry() was passed an empty array, aborting.";
return;
}
$in_clause = join ', ', @$id;
$in_clause = qq{in ($in_clause)};
} else {
unless (defined($id) and not(ref($id))) {
if (ref($id)) {
$debug && warn "get_entry was passed an invalid reference, aborting.";
} else {
$debug && warn "get_entry was passed an undefined value, aborting.";
}
return;
}
$in_clause = qq{='$id'};
}
$params = {} unless (ref($params) eq 'HASH');
my @attributes = @{$self->attribute_list};
my %skip = map {$_ => 1} (@{$params->{'skip'} || []}, @{$self->map_list}, 'name', 'id');
@attributes = grep {!$skip{$_}} @attributes;
if ($params->{'limit'}) {
my %limit = map {$_ => 1} @{$params->{'limit'}};
@attributes = grep {$limit{$_}} @attributes;
}
if ($params->{'require'}) {
my %unique = ();
@attributes = grep {$unique{$_}++ == 0} (@attributes, @{$params->{'require'}});
}
my $attributes = join (", ", @attributes);
$self->read_db;
my $sh = $self->db->prepare("select id, name, $attributes from _wyrd_index where id $in_clause");
$sh->execute;
if ($sh->err) {
$self->set_error($sh->errstr);
}
my @entries = ();
while(my $data_ref = $sh->fetchrow_hashref) {
#copy off the data to a hash
my %entry = %$data_ref;
push @entries, \%entry;
}
$self->close_db;
if (wantarray) {
return @entries;
} else {
return $entries[0];
}
}
sub get_id {
my ($self, $name) = @_;
my $sh = $self->db->prepare('select id from _wyrd_index where name=?');
$sh->execute($name);
if ($sh->err) {
$self->set_error($sh->errstr);
}
my $not_found = undef;
my $data_ref = $sh->fetchrow_arrayref;
my $id = $data_ref->[0];
unless ($id) {
$not_found = 1;
}
if (wantarray) {
return ($id, $not_found);
}
return $id;
}
sub get_value {
my ($self, $id, $attribute) = @_;
my $sh = $self->db->prepare("select $attribute from _wyrd_index where id=?");
$sh->execute($id);
if ($sh->err) {
$self->set_error($sh->errstr);
}
my $data_ref = $sh->fetchrow_arrayref;
my $value = $data_ref->[0];
return $value;
}
sub update_key {
my ($self, $id, $attribute, $value) = @_;
my $sh = $self->db->prepare("update _wyrd_index set $attribute=? where id=?");
$sh->execute($value, $id);
if ($sh->err) {
$self->set_error($sh->errstr);
}
return;
}
sub delete_key {
&obsolete;
}
sub process_html {
my ($self, $id, $data) = @_;
return if ($self->{'runtime_flags'}->{'no_data'});
#Remove all punctuation noise from the data
$data = $self->clean_html($data);
$self->update_key($id, 'data', $data);
my $wordcount = $self->index_words($id, $data);
$self->update_key($id, 'wordcount', $wordcount);
return;
}
sub extract_html {
&obsolete;
}
sub index_words {
my ($self, $id, $data) = @_;
# Split text into Array of words
my (@words) = split(/\s+/, $data);
$self->index_map('data', $id, \@words);
return scalar(@words);
}
sub index_map {
my ($self, $attribute_name, $id, $data) = @_;
( run in 0.844 second using v1.01-cache-2.11-cpan-39bf76dae61 )