KinoSearch

 view release on metacpan or  search on metacpan

core/KinoSearch/Index/SegPostingList.c  view on Meta::CPAN


Posting*
SegPList_get_posting(SegPostingList *self) 
{
    return self->posting;
}

uint32_t
SegPList_get_doc_freq(SegPostingList *self) 
{
    return self->doc_freq;
}

int32_t
SegPList_get_doc_id(SegPostingList *self) 
{
    return self->posting->doc_id;
}

uint32_t
SegPList_get_count(SegPostingList *self) { return self->count; }
InStream*
SegPList_get_post_stream(SegPostingList *self) { return self->post_stream; }

int32_t
SegPList_next(SegPostingList *self) 
{
    InStream *const post_stream = self->post_stream;
    Posting  *const posting     = self->posting;

    // Bail if we're out of docs. 
    if (self->count >= self->doc_freq) {
        Post_Reset(posting);
        return 0;
    }
    self->count++;

    Post_Read_Record(posting, post_stream);

    return posting->doc_id;
}

int32_t
SegPList_advance(SegPostingList *self, int32_t target)
{
    Posting *posting          = self->posting;
    const uint32_t skip_interval = self->skip_interval;

    if (self->doc_freq >= skip_interval) {
        InStream *post_stream           = self->post_stream;
        InStream *skip_stream           = self->skip_stream;
        SkipStepper *const skip_stepper = self->skip_stepper;
        uint32_t new_doc_id             = skip_stepper->doc_id;
        int64_t new_filepos             = InStream_Tell(post_stream);

        /* Assuming the default skip_interval of 16...
         * 
         * Say we're currently on the 5th doc matching this term, and we get a
         * request to skip to the 18th doc matching it.  We won't have skipped
         * yet, but we'll have already gone past 5 of the 16 skip docs --
         * ergo, the modulus in the following formula.
         */
        int32_t num_skipped = 0 - (self->count % skip_interval);
        if (num_skipped == 0 && self->count != 0) { 
            num_skipped = 0 - skip_interval; 
        }

        // See if there's anything to skip. 
        while (target > skip_stepper->doc_id) {
            new_doc_id    = skip_stepper->doc_id;
            new_filepos   = skip_stepper->filepos;

            if (   skip_stepper->doc_id != 0 
                && skip_stepper->doc_id >= posting->doc_id
            ) {
                num_skipped += skip_interval;
            }

            if (self->skip_count >= self->num_skips)
                break;

            SkipStepper_Read_Record(skip_stepper, skip_stream);
            self->skip_count++;
        }

        // If we found something to skip, skip it. 
        if (new_filepos > InStream_Tell(post_stream)) {

            // Move the postings filepointer up. 
            InStream_Seek(post_stream, new_filepos);

            // Jump to the new doc id. 
            posting->doc_id = new_doc_id;

            // Increase count by the number of docs we skipped over. 
            self->count += num_skipped;
        }
    }

    // Done skipping, so scan. 
    while (1) {
        int32_t doc_id = SegPList_Next(self);
        if (doc_id == 0 || doc_id >= target)
            return doc_id; 
    }
}

void
SegPList_seek(SegPostingList *self, Obj *target)
{
    LexiconReader *lex_reader = PListReader_Get_Lex_Reader(self->plist_reader);
    TermInfo      *tinfo      = LexReader_Fetch_Term_Info(lex_reader, 
        self->field, target);
    S_seek_tinfo(self, tinfo);
    DECREF(tinfo);
}

void
SegPList_seek_lex(SegPostingList *self, Lexicon *lexicon)
{
    // Maybe true, maybe not. 



( run in 2.250 seconds using v1.01-cache-2.11-cpan-d06a3f9ecfd )