DBIx-TextIndex

 view release on metacpan or  search on metacpan

lib/DBIx/TextIndex.pm  view on Meta::CPAN

sub _commit_docs {
    my $self = shift;

    my $added_ids = shift || $self->{ADDED_IDS};

    my $id_a = $self->max_indexed_id + 1; # old max_indexed_id
    $self->max_indexed_id($added_ids->[-1]);
    $self->all_doc_ids($added_ids);

    my ($sql, $sth);
    my $id_b = $self->{MAX_INDEXED_ID};

    _log("Storing doc weights\n") if $PA;

    $self->_fetch_docweights(1);

    $self->{INDEX_DBH}->begin_work;

    $sth = $self->{INDEX_DBH}->prepare($self->{DB}->update_docweights);

    no warnings qw(uninitialized);
    foreach my $fno ( 0 .. $#{$self->{DOC_FIELDS}} ) {
	my @w_d;
	if ($#{$self->{W_D}->[$fno]} >= 0) {
	    @w_d = @{$self->{W_D}->[$fno]};
	    @w_d[$id_a .. $id_b] =
		@{$self->{NEW_W_D}->[$fno]}[$id_a .. $id_b];
	} else {
	    @w_d = @{$self->{NEW_W_D}->[$fno]};
	}
	my $sum;
	foreach (@w_d) {
	    $sum += $_;
	}
	# FIXME: use actual doc count instead of max_indexed_id
	my $avg_w_d = $sum / $id_b; 
	$w_d[0] = 0 unless defined $w_d[0];
	# FIXME: this takes too much space, use a float compression method
	my $packed_w_d = pack 'f*', @w_d;
	$self->{DB}->update_docweights_execute($sth, $fno, $avg_w_d, $packed_w_d);
	# Set AVG_W_D and W_D cached values to new value, in case same 
	# instance is used for search immediately after adding to index
	$self->{AVG_W_D}->[$fno] = $avg_w_d;
	$self->{W_D}->[$fno] = \@w_d;
    }

    $sth->finish;

    # Delete temporary in-memory structure
    delete($self->{NEW_W_D});

    _log("Committing inverted tables to database\n") if $PA;

    foreach my $fno ( 0 .. $#{$self->{DOC_FIELDS}} ) {

	_log("field$fno ", scalar keys %{$self->{TERM_DOCS_VINT}->[$fno]},
             " distinct terms\n") if $PA;

	my $s_sth;

	# SQLite chokes with "database table is locked" unless s_sth
	# is finished before i_sth->execute
	unless ($self->{DBD_TYPE} eq 'SQLite') {
	    $s_sth = $self->{INDEX_DBH}->prepare(
		         $self->{DB}->inverted_select(
			    $self->{INVERTED_TABLES}->[$fno] ) );
	}
	my $i_sth = $self->{INDEX_DBH}->prepare(
		        $self->{DB}->inverted_replace(
			    $self->{INVERTED_TABLES}->[$fno] ) );

	my $tc = 0;
	while (my ($term, $term_docs_vint) =
	       each %{$self->{TERM_DOCS_VINT}->[$fno]}) {

	    _log("$term\n") if $PA >= 2;
	    if ($PA && $tc > 0) {
		_log("committed $tc terms\n") if $tc % 500 == 0;
	    }

	    my $o_docfreq_t = 0;
	    my $o_term_docs = '';
	    my $o_term_pos = '';

	    $s_sth = $self->{INDEX_DBH}->prepare( $self->{DB}->inverted_select(
				   $self->{INVERTED_TABLES}->[$fno]) )
		if $self->{DBD_TYPE} eq 'SQLite';
	    $s_sth->execute($term);
	    $s_sth->bind_columns(\$o_docfreq_t, \$o_term_docs, \$o_term_pos);
	    $s_sth->fetch;
	    $s_sth->finish if $self->{DBD_TYPE} eq 'SQLite';
	    my $term_docs = pack_term_docs_append_vint($o_term_docs,
						       $term_docs_vint);

	    my $term_pos = $o_term_pos . $self->{TERM_POS}->[$fno]->{$term};

	    $self->{DB}->inverted_replace_execute(
		$i_sth,
	        $term,
		$self->{DOCFREQ_T}->[$fno]->{$term} + $o_docfreq_t,
		$term_docs,
		$term_pos,
	    );

	    delete($self->{TERM_DOCS_VINT}->[$fno]->{$term});
            delete($self->{TERM_POS}->[$fno]->{$term});
	    $tc++;
	}
        $i_sth->finish if $self->{DBD_TYPE} eq 'SQLite';
	_log("committed $tc terms\n") if $PA && $tc > 0;
	# Flush temporary hashes after data is stored
	delete($self->{TERM_DOCS_VINT}->[$fno]);
	delete($self->{TERM_POS}->[$fno]);
	delete($self->{DOCFREQ_T}->[$fno]);
    }

    $self->{INDEX_DBH}->commit;

}

sub _add_to_delete_queue {



( run in 2.536 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )