KinoSearch1
view release on metacpan or search on metacpan
lib/KinoSearch1/Index/PostingsWriter.pm view on Meta::CPAN
# Bulk add all the postings in a segment to the sort pool.
sub add_segment {
my ( $self, $seg_reader, $doc_map ) = @_;
my $term_enum = $seg_reader->terms;
my $term_docs = $seg_reader->term_docs;
$term_docs->set_read_positions(1);
_add_segment( $self->{sort_pool}, $term_enum, $term_docs, $doc_map );
}
=for comment
Process all the postings in the sort pool. Generate the freqs and positions
files. Hand off data to TermInfosWriter for the generating the term
dictionaries.
=cut
sub write_postings {
my $self = shift;
my ( $invindex, $seg_name ) = @{$self}{ 'invindex', 'seg_name' };
$self->{sort_pool}->sort_all;
my $tinfos_writer = KinoSearch1::Index::TermInfosWriter->new(
invindex => $invindex,
seg_name => $seg_name,
);
my $frq_file = "$seg_name.frq";
my $prx_file = "$seg_name.prx";
for ( $frq_file, $prx_file ) {
$invindex->delete_file($_) if $invindex->file_exists($_);
}
my $frq_out = $invindex->open_outstream($frq_file);
my $prx_out = $invindex->open_outstream($prx_file);
_write_postings( $self->{sort_pool}, $tinfos_writer, $frq_out, $prx_out );
$frq_out->close;
$prx_out->close;
$tinfos_writer->finish;
}
sub finish {
my $self = shift;
$self->{sort_pool}->close;
}
1;
__END__
__XS__
MODULE = KinoSearch1 PACKAGE = KinoSearch1::Index::PostingsWriter
void
_write_postings (sort_pool, tinfos_writer, frq_out, prx_out)
SortExternal *sort_pool;
TermInfosWriter *tinfos_writer;
OutStream *frq_out;
OutStream *prx_out;
PPCODE:
Kino1_PostWriter_write_postings(sort_pool, tinfos_writer, frq_out,
prx_out);
void
_add_segment(sort_pool, term_enum, term_docs, doc_map_ref)
SortExternal *sort_pool;
SegTermEnum *term_enum;
TermDocs *term_docs;
SV *doc_map_ref;
PPCODE:
Kino1_PostWriter_add_segment(sort_pool, term_enum, term_docs,
doc_map_ref);
__H__
#ifndef H_KINOSEARCH_INDEX_POSTINGS_WRITER
#define H_KINOSEARCH_INDEX_POSTINGS_WRITER 1
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
#include "KinoSearch1IndexSegTermEnum.h"
#include "KinoSearch1IndexTerm.h"
#include "KinoSearch1IndexTermDocs.h"
#include "KinoSearch1IndexTermInfosWriter.h"
#include "KinoSearch1StoreOutStream.h"
#include "KinoSearch1UtilByteBuf.h"
#include "KinoSearch1UtilSortExternal.h"
void Kino1_PostWriter_write_postings(SortExternal*, TermInfosWriter*,
OutStream*, OutStream*);
void Kino1_PostWriter_add_segment(SortExternal*, SegTermEnum*, TermDocs*, SV*);
#endif /* include guard */
__C__
#include "KinoSearch1IndexPostingsWriter.h"
static void Kino1_PostWriter_deserialize(ByteBuf*, ByteBuf*, ByteBuf*,
U32*, U32*);
static void Kino1_PostWriter_write_positions(OutStream*, ByteBuf*);
void
Kino1_PostWriter_write_postings(SortExternal *sort_pool,
TermInfosWriter *tinfos_writer,
OutStream *frq_out, OutStream *prx_out) {
ByteBuf *posting = NULL;
ByteBuf *positions, *termstring, *last_termstring;
TermInfo *tinfo;
U32 doc_num = 0;
U32 freq = 0;
U32 last_doc_num = 0;
U32 last_skip_doc = 0;
double frq_ptr, prx_ptr;
double last_skip_frq_ptr = 0.0;
double last_skip_prx_ptr = 0.0;
I32 iter = 0;
I32 i;
AV *skip_data_av;
SV *skip_sv;
posting = Kino1_BB_new_string("", 0);
last_termstring = Kino1_BB_new_string("\0\0", 2);
termstring = Kino1_BB_new_view(NULL, 0);
positions = Kino1_BB_new_view(NULL, 0);
tinfo = Kino1_TInfo_new();
skip_data_av = newAV();
skip_sv = &PL_sv_undef;
( run in 0.688 second using v1.01-cache-2.11-cpan-5511b514fd6 )