KinoSearch1
view release on metacpan or search on metacpan
lib/KinoSearch1/Search/PhraseScorer.pm view on Meta::CPAN
package KinoSearch1::Search::PhraseScorer;
use strict;
use warnings;
use KinoSearch1::Util::ToolSet;
use base qw( KinoSearch1::Search::Scorer );
BEGIN {
__PACKAGE__->init_instance_vars(
# constructor params
weight => undef,
term_docs => undef,
phrase_offsets => undef,
norms_reader => undef,
slop => 0,
);
}
our %instance_vars;
sub new {
my $either = shift;
confess kerror() unless verify_args( \%instance_vars, @_ );
my %args = ( %instance_vars, @_ );
my $self = $either->SUPER::new;
$self->_init_child;
# set/derive some member vars
$self->_set_norms( $args{norms_reader}->get_bytes );
$self->set_similarity( $args{similarity} );
$self->_set_weight_value( $args{weight}->get_value );
confess("Sloppy phrase matching not yet implemented")
unless $args{slop} == 0; # TODO -- enable slop.
$self->_set_slop( $args{slop} );
# sort terms by ascending frequency
confess("positions count doesn't match term count")
unless $#{ $args{term_docs} } == $#{ $args{phrase_offsets} };
my @by_size = sort { $a->[0]->get_doc_freq <=> $b->[0]->get_doc_freq }
map { [ $args{term_docs}[$_], $args{phrase_offsets}[$_] ] }
0 .. $#{ $args{term_docs} };
my @term_docs = map { $_->[0] } @by_size;
my @phrase_offsets = map { $_->[1] } @by_size;
$self->_init_elements( \@term_docs, \@phrase_offsets );
return $self;
}
1;
__END__
__XS__
MODULE = KinoSearch1 PACKAGE = KinoSearch1::Search::PhraseScorer
void
_init_child(scorer)
Scorer *scorer;
PPCODE:
Kino1_PhraseScorer_init_child(scorer);
void
_init_elements(scorer, term_docs_av, phrase_offsets_av)
Scorer *scorer;
AV *term_docs_av;
AV *phrase_offsets_av;
PREINIT:
PhraseScorerChild *child;
I32 i;
SV **sv_ptr;
IV tmp;
PPCODE:
{
child = (PhraseScorerChild*)scorer->child;
SvREFCNT_inc(term_docs_av);
SvREFCNT_dec(child->term_docs_av);
child->term_docs_av = term_docs_av;
child->num_elements = av_len(term_docs_av) + 1;
Kino1_New(0, child->term_docs, child->num_elements, TermDocs*);
Kino1_New(0, child->phrase_offsets, child->num_elements, U32);
/* create an array of TermDocs* */
for(i = 0; i < child->num_elements; i++) {
sv_ptr = av_fetch(term_docs_av, i, 0);
tmp = SvIV((SV*)SvRV( *sv_ptr ));
child->term_docs[i] = INT2PTR(TermDocs*, tmp);
sv_ptr = av_fetch(phrase_offsets_av, i, 0);
child->phrase_offsets[i] = SvIV( *sv_ptr );
}
}
SV*
_phrase_scorer_set_or_get(scorer, ...)
Scorer *scorer;
ALIAS:
_set_slop = 1
_get_slop = 2
_set_weight_value = 3
_get_weight_value = 4
_set_norms = 5
_get_norms = 6
CODE:
{
PhraseScorerChild *child = (PhraseScorerChild*)scorer->child;
KINO_START_SET_OR_GET_SWITCH
case 1: child->slop = SvIV( ST(1) );
/* fall through */
case 2: RETVAL = newSViv(child->slop);
break;
case 3: child->weight_value = SvNV( ST(1) );
/* fall through */
case 4: RETVAL = newSVnv(child->weight_value);
break;
case 5: SvREFCNT_dec(child->norms_sv);
child->norms_sv = newSVsv( ST(1) );
{
SV* bytes_deref_sv;
bytes_deref_sv = SvRV(child->norms_sv);
if (SvPOK(bytes_deref_sv)) {
child->norms = (unsigned char*)SvPVX(bytes_deref_sv);
}
else {
child->norms = NULL;
}
}
/* fall through */
case 6: RETVAL = newSVsv(child->norms_sv);
break;
KINO_END_SET_OR_GET_SWITCH
}
OUTPUT: RETVAL
void
DESTROY(scorer)
Scorer *scorer;
PPCODE:
Kino1_PhraseScorer_destroy(scorer);
__H__
#ifndef H_KINO_PHRASE_SCORER
#define H_KINO_PHRASE_SCORER 1
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
#include "KinoSearch1IndexTermDocs.h"
#include "KinoSearch1SearchScorer.h"
#include "KinoSearch1UtilMemManager.h"
typedef struct phrasescorerchild {
U32 doc;
U32 slop;
U32 num_elements;
TermDocs **term_docs;
U32 *phrase_offsets;
float phrase_freq;
float weight_value;
U32 first_time;
unsigned char *norms;
SV *anchor_set;
float (*calc_phrase_freq)(Scorer*);
SV *norms_sv;
AV *term_docs_av;
} PhraseScorerChild;
void Kino1_PhraseScorer_init_child(Scorer*);
bool Kino1_PhraseScorer_next(Scorer*);
float Kino1_PhraseScorer_calc_phrase_freq(Scorer*);
U32 Kino1_PhraseScorer_doc(Scorer*);
float Kino1_PhraseScorer_score(Scorer*);
void Kino1_PhraseScorer_destroy(Scorer*);
#endif /* include guard */
__C__
#include "KinoSearch1SearchPhraseScorer.h"
void
Kino1_PhraseScorer_init_child(Scorer *scorer) {
PhraseScorerChild *child;
/* allocate */
Kino1_New(0, child, 1, PhraseScorerChild);
scorer->child = child;
child->anchor_set = newSV(0);
/* init */
child->doc = 0xFFFFFFFF;
child->slop = 0;
child->first_time = 1;
child->phrase_freq = 0.0;
child->norms = NULL;
child->phrase_offsets = NULL;
( run in 0.803 second using v1.01-cache-2.11-cpan-5511b514fd6 )