KinoSearch1

 view release on metacpan or  search on metacpan

lib/KinoSearch1/Index/PostingsWriter.pm  view on Meta::CPAN

# Bulk add all the postings in a segment to the sort pool.
sub add_segment {
    my ( $self, $seg_reader, $doc_map ) = @_;
    my $term_enum = $seg_reader->terms;
    my $term_docs = $seg_reader->term_docs;
    $term_docs->set_read_positions(1);
    _add_segment( $self->{sort_pool}, $term_enum, $term_docs, $doc_map );
}

=for comment

Process all the postings in the sort pool.  Generate the freqs and positions
files.  Hand off data to TermInfosWriter for the generating the term
dictionaries.

=cut

sub write_postings {
    my $self = shift;
    my ( $invindex, $seg_name ) = @{$self}{ 'invindex', 'seg_name' };

    $self->{sort_pool}->sort_all;

    my $tinfos_writer = KinoSearch1::Index::TermInfosWriter->new(
        invindex => $invindex,
        seg_name => $seg_name,
    );
    my $frq_file = "$seg_name.frq";
    my $prx_file = "$seg_name.prx";
    for ( $frq_file, $prx_file ) {
        $invindex->delete_file($_) if $invindex->file_exists($_);
    }
    my $frq_out = $invindex->open_outstream($frq_file);
    my $prx_out = $invindex->open_outstream($prx_file);

    _write_postings( $self->{sort_pool}, $tinfos_writer, $frq_out, $prx_out );

    $frq_out->close;
    $prx_out->close;
    $tinfos_writer->finish;
}

sub finish {
    my $self = shift;
    $self->{sort_pool}->close;
}

1;

__END__
__XS__

MODULE = KinoSearch1    PACKAGE = KinoSearch1::Index::PostingsWriter      

void
_write_postings (sort_pool, tinfos_writer, frq_out, prx_out)
    SortExternal    *sort_pool;
    TermInfosWriter *tinfos_writer;
    OutStream       *frq_out;
    OutStream       *prx_out;
PPCODE:
    Kino1_PostWriter_write_postings(sort_pool, tinfos_writer, frq_out,
        prx_out);

void
_add_segment(sort_pool, term_enum, term_docs, doc_map_ref)
    SortExternal  *sort_pool;
    SegTermEnum  *term_enum;
    TermDocs *term_docs;
    SV  *doc_map_ref;
PPCODE:
    Kino1_PostWriter_add_segment(sort_pool, term_enum, term_docs, 
        doc_map_ref);

__H__

#ifndef H_KINOSEARCH_INDEX_POSTINGS_WRITER
#define H_KINOSEARCH_INDEX_POSTINGS_WRITER 1

#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
#include "KinoSearch1IndexSegTermEnum.h"
#include "KinoSearch1IndexTerm.h"
#include "KinoSearch1IndexTermDocs.h"
#include "KinoSearch1IndexTermInfosWriter.h"
#include "KinoSearch1StoreOutStream.h"
#include "KinoSearch1UtilByteBuf.h"
#include "KinoSearch1UtilSortExternal.h"

void Kino1_PostWriter_write_postings(SortExternal*, TermInfosWriter*, 
                                    OutStream*, OutStream*);
void Kino1_PostWriter_add_segment(SortExternal*, SegTermEnum*, TermDocs*, SV*);

#endif /* include guard */

__C__

#include "KinoSearch1IndexPostingsWriter.h"

static void Kino1_PostWriter_deserialize(ByteBuf*, ByteBuf*, ByteBuf*, 
                                        U32*, U32*);
static void Kino1_PostWriter_write_positions(OutStream*, ByteBuf*);

void
Kino1_PostWriter_write_postings(SortExternal *sort_pool,
                               TermInfosWriter *tinfos_writer, 
                               OutStream *frq_out, OutStream *prx_out) {
    ByteBuf   *posting           = NULL;
    ByteBuf   *positions, *termstring, *last_termstring;
    TermInfo  *tinfo;
    U32        doc_num           = 0;
    U32        freq              = 0;
    U32        last_doc_num      = 0;
    U32        last_skip_doc     = 0;
    double     frq_ptr, prx_ptr;
    double     last_skip_frq_ptr = 0.0;
    double     last_skip_prx_ptr = 0.0;
    I32        iter              = 0;
    I32        i;
    AV        *skip_data_av;
    SV        *skip_sv;

    posting         = Kino1_BB_new_string("", 0);
    last_termstring = Kino1_BB_new_string("\0\0", 2);
    termstring      = Kino1_BB_new_view(NULL, 0);
    positions       = Kino1_BB_new_view(NULL, 0);
    tinfo           = Kino1_TInfo_new();
    skip_data_av    = newAV();
    skip_sv         = &PL_sv_undef;



( run in 0.688 second using v1.01-cache-2.11-cpan-5511b514fd6 )