KinoSearch
view release on metacpan or search on metacpan
core/KinoSearch/Index/SegPostingList.c view on Meta::CPAN
Posting*
SegPList_get_posting(SegPostingList *self)
{
return self->posting;
}
uint32_t
SegPList_get_doc_freq(SegPostingList *self)
{
return self->doc_freq;
}
int32_t
SegPList_get_doc_id(SegPostingList *self)
{
return self->posting->doc_id;
}
uint32_t
SegPList_get_count(SegPostingList *self) { return self->count; }
InStream*
SegPList_get_post_stream(SegPostingList *self) { return self->post_stream; }
int32_t
SegPList_next(SegPostingList *self)
{
InStream *const post_stream = self->post_stream;
Posting *const posting = self->posting;
// Bail if we're out of docs.
if (self->count >= self->doc_freq) {
Post_Reset(posting);
return 0;
}
self->count++;
Post_Read_Record(posting, post_stream);
return posting->doc_id;
}
int32_t
SegPList_advance(SegPostingList *self, int32_t target)
{
Posting *posting = self->posting;
const uint32_t skip_interval = self->skip_interval;
if (self->doc_freq >= skip_interval) {
InStream *post_stream = self->post_stream;
InStream *skip_stream = self->skip_stream;
SkipStepper *const skip_stepper = self->skip_stepper;
uint32_t new_doc_id = skip_stepper->doc_id;
int64_t new_filepos = InStream_Tell(post_stream);
/* Assuming the default skip_interval of 16...
*
* Say we're currently on the 5th doc matching this term, and we get a
* request to skip to the 18th doc matching it. We won't have skipped
* yet, but we'll have already gone past 5 of the 16 skip docs --
* ergo, the modulus in the following formula.
*/
int32_t num_skipped = 0 - (self->count % skip_interval);
if (num_skipped == 0 && self->count != 0) {
num_skipped = 0 - skip_interval;
}
// See if there's anything to skip.
while (target > skip_stepper->doc_id) {
new_doc_id = skip_stepper->doc_id;
new_filepos = skip_stepper->filepos;
if ( skip_stepper->doc_id != 0
&& skip_stepper->doc_id >= posting->doc_id
) {
num_skipped += skip_interval;
}
if (self->skip_count >= self->num_skips)
break;
SkipStepper_Read_Record(skip_stepper, skip_stream);
self->skip_count++;
}
// If we found something to skip, skip it.
if (new_filepos > InStream_Tell(post_stream)) {
// Move the postings filepointer up.
InStream_Seek(post_stream, new_filepos);
// Jump to the new doc id.
posting->doc_id = new_doc_id;
// Increase count by the number of docs we skipped over.
self->count += num_skipped;
}
}
// Done skipping, so scan.
while (1) {
int32_t doc_id = SegPList_Next(self);
if (doc_id == 0 || doc_id >= target)
return doc_id;
}
}
void
SegPList_seek(SegPostingList *self, Obj *target)
{
LexiconReader *lex_reader = PListReader_Get_Lex_Reader(self->plist_reader);
TermInfo *tinfo = LexReader_Fetch_Term_Info(lex_reader,
self->field, target);
S_seek_tinfo(self, tinfo);
DECREF(tinfo);
}
void
SegPList_seek_lex(SegPostingList *self, Lexicon *lexicon)
{
// Maybe true, maybe not.
( run in 2.250 seconds using v1.01-cache-2.11-cpan-d06a3f9ecfd )