KinoSearch

 view release on metacpan or  search on metacpan

core/KinoSearch/Search/TermQuery.c  view on Meta::CPAN

     * per-document.
     */
    self->raw_weight = self->idf * self->boost;

    // Make final preparations. 
    TermCompiler_Normalize(self);

    return self;
}

bool_t
TermCompiler_equals(TermCompiler *self, Obj *other)
{
    TermCompiler *evil_twin = (TermCompiler*)other;
    if (!Compiler_equals((Compiler*)self, other)) return false;
    if (!Obj_Is_A(other, TERMCOMPILER)) return false;
    if (self->idf != evil_twin->idf) return false;
    if (self->raw_weight != evil_twin->raw_weight) return false;
    if (self->query_norm_factor != evil_twin->query_norm_factor) return false;
    if (self->normalized_weight != evil_twin->normalized_weight) return false;
    return true;
}

void
TermCompiler_serialize(TermCompiler *self, OutStream *outstream)
{
    Compiler_serialize((Compiler*)self, outstream);
    OutStream_Write_F32(outstream, self->idf);
    OutStream_Write_F32(outstream, self->raw_weight);
    OutStream_Write_F32(outstream, self->query_norm_factor);
    OutStream_Write_F32(outstream, self->normalized_weight);
}

TermCompiler*
TermCompiler_deserialize(TermCompiler *self, InStream *instream)
{
    self = self ? self : (TermCompiler*)VTable_Make_Obj(TERMCOMPILER);
    Compiler_deserialize((Compiler*)self, instream);
    self->idf               = InStream_Read_F32(instream);
    self->raw_weight        = InStream_Read_F32(instream);
    self->query_norm_factor = InStream_Read_F32(instream);
    self->normalized_weight = InStream_Read_F32(instream);
    return self;
}

float
TermCompiler_sum_of_squared_weights(TermCompiler *self)
{ 
    return self->raw_weight * self->raw_weight;
}

void
TermCompiler_apply_norm_factor(TermCompiler *self, float query_norm_factor) 
{
    self->query_norm_factor = query_norm_factor;

    /* Multiply raw weight by the idf and norm_q factors in this:
     * 
     *      ( tf_q * idf_q / norm_q )
     *
     * Note: factoring in IDF a second time is correct.  See formula.
     */
    self->normalized_weight 
        = self->raw_weight * self->idf * query_norm_factor;
}

float
TermCompiler_get_weight(TermCompiler *self)
{
    return self->normalized_weight;
}

Matcher*
TermCompiler_make_matcher(TermCompiler *self, SegReader *reader, 
                          bool_t need_score)
{
    TermQuery *tparent = (TermQuery*)self->parent;
    PostingListReader *plist_reader = (PostingListReader*)SegReader_Fetch(reader,
        VTable_Get_Name(POSTINGLISTREADER));
    PostingList *plist = plist_reader 
        ? PListReader_Posting_List(plist_reader, tparent->field, tparent->term)
        : NULL;

    if (plist == NULL || PList_Get_Doc_Freq(plist) == 0) {
        DECREF(plist);
        return NULL;
    }
    else {
        Matcher *retval = PList_Make_Matcher(plist, self->sim, 
            (Compiler*)self, need_score);
        DECREF(plist);
        return retval;
    }
}

VArray*
TermCompiler_highlight_spans(TermCompiler *self, Searcher *searcher, 
                             DocVector *doc_vec, const CharBuf *field)
{
    TermQuery *const parent = (TermQuery*)self->parent;
    VArray *spans = VA_new(0);
    TermVector *term_vector;
    I32Array *starts, *ends;
    uint32_t i, max;
    UNUSED_VAR(searcher);

    if (!CB_Equals(parent->field, (Obj*)field)) return spans;

    // Add all starts and ends. 
    term_vector = DocVec_Term_Vector(doc_vec, field, (CharBuf*)parent->term);
    if (!term_vector) return spans;

    starts = TV_Get_Start_Offsets(term_vector);
    ends   = TV_Get_End_Offsets(term_vector);
    for (i = 0, max = I32Arr_Get_Size(starts); i < max; i++) {
        int32_t start  = I32Arr_Get(starts, i);
        int32_t length = I32Arr_Get(ends, i) - start;
        VA_Push(spans, 
            (Obj*)Span_new(start, length, TermCompiler_Get_Weight(self)) );
    }



( run in 0.460 second using v1.01-cache-2.11-cpan-5511b514fd6 )