KinoSearch1

 view release on metacpan or  search on metacpan

lib/KinoSearch1/Index/SegWriter.pm  view on Meta::CPAN

    }
}

# Finish writing the segment.
sub finish {
    my $self = shift;
    my ( $invindex, $seg_name ) = @{$self}{ 'invindex', 'seg_name' };

    # write Term Dictionary, positions.
    $self->{postings_writer}->write_postings;

    # write FieldInfos
    my $fnm_file = "$seg_name.fnm";
    $invindex->delete_file($fnm_file) if $invindex->file_exists($fnm_file);
    my $finfos_outstream = $invindex->open_outstream("$seg_name.fnm");
    $self->{finfos}->write_infos($finfos_outstream);
    $finfos_outstream->close;

    # close down all the writers, so we can open the files they've finished.
    $self->{postings_writer}->finish;
    $self->{fields_writer}->finish;
    for ( @{ $self->{norm_outstreams} } ) {
        $_->close if defined;
    }

    # consolidate compound file - if we actually added any docs
    my @compound_files = map {"$seg_name.$_"} @COMPOUND_EXTENSIONS;
    if ( $self->{doc_count} ) {
        my $compound_file_writer
            = KinoSearch1::Index::CompoundFileWriter->new(
            invindex => $invindex,
            filename => "$seg_name.tmp",
            );
        push @compound_files, map { "$seg_name.f" . $_->get_field_num }
            grep { $_->get_indexed } $self->{finfos}->get_infos;
        $compound_file_writer->add_file($_) for @compound_files;
        $compound_file_writer->finish;
        $invindex->rename_file( "$seg_name.tmp", "$seg_name.cfs" );
    }

    # delete files that are no longer needed;
    $invindex->delete_file($_) for @compound_files;
    my $sort_file_name = "$seg_name" . SORTFILE_EXTENSION;
    $invindex->delete_file($sort_file_name)
        if $invindex->file_exists($sort_file_name);
}

1;

__END__

__XS__

MODULE = KinoSearch1   PACKAGE = KinoSearch1::Index::SegWriter

void
_write_remapped_norms(outstream, doc_map_ref, norms_ref)
    OutStream *outstream;
    SV        *doc_map_ref;
    SV        *norms_ref;
PPCODE: 
    Kino1_SegWriter_write_remapped_norms(outstream, doc_map_ref, norms_ref);

__H__

#ifndef H_KINOSEARCH_SEG_WRITER
#define H_KINOSEARCH_SEG_WRITER 1

#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
#include "KinoSearch1StoreOutStream.h"
#include "KinoSearch1UtilCarp.h"

void Kino1_SegWriter_write_remapped_norms(OutStream*, SV*, SV*);

#endif /* include guard */

__C__

#include "KinoSearch1IndexSegWriter.h"

void 
Kino1_SegWriter_write_remapped_norms(OutStream *outstream, SV *doc_map_ref,
                                    SV* norms_ref) {
    SV     *norms_sv, *doc_map_sv;
    I32    *doc_map, *doc_map_end;
    char   *norms;
    STRLEN  doc_map_len, norms_len;
    
    /* extract doc map and norms arrays */
    doc_map_sv  = SvRV(doc_map_ref);
    doc_map     = (I32*)SvPV(doc_map_sv, doc_map_len);
    doc_map_end = (I32*)SvEND(doc_map_sv);
    norms_sv    = SvRV(norms_ref);
    norms       = SvPV(norms_sv, norms_len);
    if (doc_map_len != norms_len * sizeof(I32))
        Kino1_confess("Mismatched doc_map and norms");

    /* write a norm for each non-deleted doc */
    while (doc_map < doc_map_end) {
        if (*doc_map != -1) {
            outstream->write_byte(outstream, *norms);
        }
        doc_map++;
        norms++;
    }
}

__POD__

==begin devdocs

==head1 NAME

KinoSearch1::Index::SegWriter - write one segment of an invindex

==head1 DESCRIPTION

SegWriter is a conduit through which information fed to InvIndexer passes on
its way to low-level writers such as FieldsWriter and TermInfosWriter.



( run in 0.493 second using v1.01-cache-2.11-cpan-5511b514fd6 )