KinoSearch1

 view release on metacpan or  search on metacpan

lib/KinoSearch1/Search/PhraseScorer.pm  view on Meta::CPAN

package KinoSearch1::Search::PhraseScorer;
use strict;
use warnings;
use KinoSearch1::Util::ToolSet;
use base qw( KinoSearch1::Search::Scorer );

BEGIN {
    __PACKAGE__->init_instance_vars(
        # constructor params
        weight         => undef,
        term_docs      => undef,
        phrase_offsets => undef,
        norms_reader   => undef,
        slop           => 0,
    );
}
our %instance_vars;

sub new {
    my $either = shift;
    confess kerror() unless verify_args( \%instance_vars, @_ );
    my %args = ( %instance_vars, @_ );
    my $self = $either->SUPER::new;
    $self->_init_child;

    # set/derive some member vars
    $self->_set_norms( $args{norms_reader}->get_bytes );
    $self->set_similarity( $args{similarity} );
    $self->_set_weight_value( $args{weight}->get_value );
    confess("Sloppy phrase matching not yet implemented")
        unless $args{slop} == 0;    # TODO -- enable slop.
    $self->_set_slop( $args{slop} );

    # sort terms by ascending frequency
    confess("positions count doesn't match term count")
        unless $#{ $args{term_docs} } == $#{ $args{phrase_offsets} };
    my @by_size = sort { $a->[0]->get_doc_freq <=> $b->[0]->get_doc_freq }
        map { [ $args{term_docs}[$_], $args{phrase_offsets}[$_] ] }
        0 .. $#{ $args{term_docs} };
    my @term_docs      = map { $_->[0] } @by_size;
    my @phrase_offsets = map { $_->[1] } @by_size;
    $self->_init_elements( \@term_docs, \@phrase_offsets );

    return $self;
}

1;

__END__

__XS__

MODULE = KinoSearch1    PACKAGE = KinoSearch1::Search::PhraseScorer

void
_init_child(scorer)
    Scorer *scorer;
PPCODE:
    Kino1_PhraseScorer_init_child(scorer);

void
_init_elements(scorer, term_docs_av, phrase_offsets_av) 
    Scorer *scorer;
    AV     *term_docs_av;
    AV     *phrase_offsets_av;
PREINIT:
    PhraseScorerChild *child;
    I32                i;
    SV               **sv_ptr;
    IV                 tmp;
PPCODE:
{
    child = (PhraseScorerChild*)scorer->child;

    SvREFCNT_inc(term_docs_av);
    SvREFCNT_dec(child->term_docs_av);
    child->term_docs_av = term_docs_av;

    child->num_elements = av_len(term_docs_av) + 1;
    Kino1_New(0, child->term_docs, child->num_elements, TermDocs*);
    Kino1_New(0, child->phrase_offsets, child->num_elements, U32);
    
    /* create an array of TermDocs* */
    for(i = 0; i < child->num_elements; i++) {
        sv_ptr = av_fetch(term_docs_av, i, 0);
        tmp                 = SvIV((SV*)SvRV( *sv_ptr ));
        child->term_docs[i] = INT2PTR(TermDocs*, tmp);
        sv_ptr = av_fetch(phrase_offsets_av, i, 0);
        child->phrase_offsets[i] = SvIV( *sv_ptr );
    }
}

SV*
_phrase_scorer_set_or_get(scorer, ...)
    Scorer *scorer;
ALIAS:
    _set_slop = 1
    _get_slop = 2
    _set_weight_value = 3
    _get_weight_value = 4
    _set_norms        = 5
    _get_norms        = 6
CODE:
{
    PhraseScorerChild *child = (PhraseScorerChild*)scorer->child;

    KINO_START_SET_OR_GET_SWITCH

    case 1:  child->slop = SvIV( ST(1) );
             /* fall through */
    case 2:  RETVAL = newSViv(child->slop);
             break;

    case 3:  child->weight_value = SvNV( ST(1) );
             /* fall through */
    case 4:  RETVAL = newSVnv(child->weight_value);
             break;

    case 5:  SvREFCNT_dec(child->norms_sv);
             child->norms_sv = newSVsv( ST(1) );
             {
                 SV* bytes_deref_sv;
                 bytes_deref_sv = SvRV(child->norms_sv);
                 if (SvPOK(bytes_deref_sv)) {
                     child->norms = (unsigned char*)SvPVX(bytes_deref_sv);
                 }
                 else {
                     child->norms = NULL;
                 }
             }
             /* fall through */
    case 6:  RETVAL = newSVsv(child->norms_sv);
             break;

    KINO_END_SET_OR_GET_SWITCH
}
OUTPUT: RETVAL

void
DESTROY(scorer)
    Scorer *scorer;
PPCODE:
    Kino1_PhraseScorer_destroy(scorer);

__H__

#ifndef H_KINO_PHRASE_SCORER
#define H_KINO_PHRASE_SCORER 1

#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
#include "KinoSearch1IndexTermDocs.h"
#include "KinoSearch1SearchScorer.h"
#include "KinoSearch1UtilMemManager.h"

typedef struct phrasescorerchild {
    U32             doc;
    U32             slop;
    U32             num_elements;
    TermDocs      **term_docs;
    U32            *phrase_offsets;
    float           phrase_freq;
    float           weight_value;
    U32             first_time;
    unsigned char  *norms;
    SV             *anchor_set;
    float         (*calc_phrase_freq)(Scorer*);
    SV             *norms_sv;
    AV             *term_docs_av;
} PhraseScorerChild;

void  Kino1_PhraseScorer_init_child(Scorer*);
bool  Kino1_PhraseScorer_next(Scorer*);
float Kino1_PhraseScorer_calc_phrase_freq(Scorer*);
U32   Kino1_PhraseScorer_doc(Scorer*);
float Kino1_PhraseScorer_score(Scorer*);
void  Kino1_PhraseScorer_destroy(Scorer*);

#endif /* include guard */


__C__

#include "KinoSearch1SearchPhraseScorer.h"

void
Kino1_PhraseScorer_init_child(Scorer *scorer) {
    PhraseScorerChild *child;

    /* allocate */
    Kino1_New(0, child, 1, PhraseScorerChild);
    scorer->child = child;
    child->anchor_set      = newSV(0);

    /* init */
    child->doc             = 0xFFFFFFFF;
    child->slop            = 0;
    child->first_time      = 1;
    child->phrase_freq     = 0.0;
    child->norms           = NULL;
    child->phrase_offsets  = NULL;



( run in 0.803 second using v1.01-cache-2.11-cpan-5511b514fd6 )