DBIx-TextIndex
view release on metacpan or search on metacpan
lib/DBIx/TextIndex.pm view on Meta::CPAN
sub _commit_docs {
my $self = shift;
my $added_ids = shift || $self->{ADDED_IDS};
my $id_a = $self->max_indexed_id + 1; # old max_indexed_id
$self->max_indexed_id($added_ids->[-1]);
$self->all_doc_ids($added_ids);
my ($sql, $sth);
my $id_b = $self->{MAX_INDEXED_ID};
_log("Storing doc weights\n") if $PA;
$self->_fetch_docweights(1);
$self->{INDEX_DBH}->begin_work;
$sth = $self->{INDEX_DBH}->prepare($self->{DB}->update_docweights);
no warnings qw(uninitialized);
foreach my $fno ( 0 .. $#{$self->{DOC_FIELDS}} ) {
my @w_d;
if ($#{$self->{W_D}->[$fno]} >= 0) {
@w_d = @{$self->{W_D}->[$fno]};
@w_d[$id_a .. $id_b] =
@{$self->{NEW_W_D}->[$fno]}[$id_a .. $id_b];
} else {
@w_d = @{$self->{NEW_W_D}->[$fno]};
}
my $sum;
foreach (@w_d) {
$sum += $_;
}
# FIXME: use actual doc count instead of max_indexed_id
my $avg_w_d = $sum / $id_b;
$w_d[0] = 0 unless defined $w_d[0];
# FIXME: this takes too much space, use a float compression method
my $packed_w_d = pack 'f*', @w_d;
$self->{DB}->update_docweights_execute($sth, $fno, $avg_w_d, $packed_w_d);
# Set AVG_W_D and W_D cached values to new value, in case same
# instance is used for search immediately after adding to index
$self->{AVG_W_D}->[$fno] = $avg_w_d;
$self->{W_D}->[$fno] = \@w_d;
}
$sth->finish;
# Delete temporary in-memory structure
delete($self->{NEW_W_D});
_log("Committing inverted tables to database\n") if $PA;
foreach my $fno ( 0 .. $#{$self->{DOC_FIELDS}} ) {
_log("field$fno ", scalar keys %{$self->{TERM_DOCS_VINT}->[$fno]},
" distinct terms\n") if $PA;
my $s_sth;
# SQLite chokes with "database table is locked" unless s_sth
# is finished before i_sth->execute
unless ($self->{DBD_TYPE} eq 'SQLite') {
$s_sth = $self->{INDEX_DBH}->prepare(
$self->{DB}->inverted_select(
$self->{INVERTED_TABLES}->[$fno] ) );
}
my $i_sth = $self->{INDEX_DBH}->prepare(
$self->{DB}->inverted_replace(
$self->{INVERTED_TABLES}->[$fno] ) );
my $tc = 0;
while (my ($term, $term_docs_vint) =
each %{$self->{TERM_DOCS_VINT}->[$fno]}) {
_log("$term\n") if $PA >= 2;
if ($PA && $tc > 0) {
_log("committed $tc terms\n") if $tc % 500 == 0;
}
my $o_docfreq_t = 0;
my $o_term_docs = '';
my $o_term_pos = '';
$s_sth = $self->{INDEX_DBH}->prepare( $self->{DB}->inverted_select(
$self->{INVERTED_TABLES}->[$fno]) )
if $self->{DBD_TYPE} eq 'SQLite';
$s_sth->execute($term);
$s_sth->bind_columns(\$o_docfreq_t, \$o_term_docs, \$o_term_pos);
$s_sth->fetch;
$s_sth->finish if $self->{DBD_TYPE} eq 'SQLite';
my $term_docs = pack_term_docs_append_vint($o_term_docs,
$term_docs_vint);
my $term_pos = $o_term_pos . $self->{TERM_POS}->[$fno]->{$term};
$self->{DB}->inverted_replace_execute(
$i_sth,
$term,
$self->{DOCFREQ_T}->[$fno]->{$term} + $o_docfreq_t,
$term_docs,
$term_pos,
);
delete($self->{TERM_DOCS_VINT}->[$fno]->{$term});
delete($self->{TERM_POS}->[$fno]->{$term});
$tc++;
}
$i_sth->finish if $self->{DBD_TYPE} eq 'SQLite';
_log("committed $tc terms\n") if $PA && $tc > 0;
# Flush temporary hashes after data is stored
delete($self->{TERM_DOCS_VINT}->[$fno]);
delete($self->{TERM_POS}->[$fno]);
delete($self->{DOCFREQ_T}->[$fno]);
}
$self->{INDEX_DBH}->commit;
}
sub _add_to_delete_queue {
( run in 2.536 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )