KinoSearch1

 view release on metacpan or  search on metacpan

lib/KinoSearch1/Document/Field.pm  view on Meta::CPAN

    $self->{fnm_bits} = KinoSearch1::Index::FieldInfos->encode_fnm_bits($self)
        unless defined $self->{fnm_bits};
    return $self->{fnm_bits};
}

sub set_fdt_bits { $_[0]->{fdt_bits} = $_[1] }

sub get_fdt_bits {
    my $self = shift;
    $self->{fdt_bits}
        = KinoSearch1::Index::FieldsReader->encode_fdt_bits($self)
        unless defined $self->{fdt_bits};
    return $self->{fdt_bits};
}

sub get_value_len { bytes::length( $_[0]->{value} ) }

# Return a TermVector object for a given Term, if it's in this field.
sub term_vector {
    my ( $self, $term_text ) = @_;
    return unless bytes::length( $self->{tv_string} );
    if ( !defined $self->{tv_cache} ) {
        $self->{tv_cache} = _extract_tv_cache( $self->{tv_string} );
    }
    if ( exists $self->{tv_cache}{$term_text} ) {
        my ( $positions, $starts, $ends )
            = _unpack_posdata( $self->{tv_cache}{$term_text} );
        my $term_vector = KinoSearch1::Index::TermVector->new(
            text          => $term_text,
            field         => $self->{name},
            positions     => $positions,
            start_offsets => $starts,
            end_offsets   => $ends,
        );
        return $term_vector;
    }

    return;
}

1;

__END__

__XS__

MODULE = KinoSearch1    PACKAGE = KinoSearch1::Document::Field

=for comment

Return ref to a hash where the keys are term texts and the values are encoded
positional data.

=cut

void
_extract_tv_cache(tv_string_sv)
    SV *tv_string_sv;
PREINIT:
    HV *tv_cache_hv;
PPCODE:
    tv_cache_hv = Kino1_Field_extract_tv_cache(tv_string_sv);
    XPUSHs( sv_2mortal( newRV_noinc( (SV*)tv_cache_hv ) ) );
    XSRETURN(1);

=for comment

Decompress positional data.

=cut

void
_unpack_posdata(posdata_sv)
    SV *posdata_sv;
PREINIT:
    AV     *positions_av, *starts_av, *ends_av;
PPCODE:
    positions_av = newAV();
    starts_av    = newAV();
    ends_av      = newAV();
    Kino1_Field_unpack_posdata(posdata_sv, positions_av, starts_av, ends_av);
    XPUSHs(sv_2mortal( newRV_noinc((SV*)positions_av) ));
    XPUSHs(sv_2mortal( newRV_noinc((SV*)starts_av)    ));
    XPUSHs(sv_2mortal( newRV_noinc((SV*)ends_av)      ));
    XSRETURN(3);


__H__

#ifndef H_KINOSEARCH_FIELD
#define H_KINOSEARCH_FIELD 1

#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
#include "KinoSearch1StoreInStream.h"
#include "KinoSearch1UtilCarp.h"

HV*  Kino1_Field_extract_tv_cache(SV*);
void Kino1_Field_unpack_posdata(SV*, AV*, AV*, AV*);

#endif /* include guard */

__C__

#include "KinoSearch1DocumentField.h"

HV* 
Kino1_Field_extract_tv_cache(SV *tv_string_sv) {
    HV *tv_cache_hv;
    char    *tv_string, *bookmark_ptr, *key;
    char   **tv_ptr;
    STRLEN   len, tv_len, overlap, key_len;
    SV      *text_sv, *nums_sv;
    I32      i, num_terms, num_positions;

    /* allocate a new hash */
    tv_cache_hv = newHV();
    
    /* extract pointers */
    tv_string = SvPV(tv_string_sv, tv_len);
    tv_ptr    = &tv_string;

    /* create a base text scalar */
    text_sv = newSV(1);
    SvPOK_on(text_sv);
    *(SvEND(text_sv)) = '\0';

    /* read the number of vectorized terms in the field */
    num_terms = Kino1_InStream_decode_vint(tv_ptr);
    for (i = 0; i < num_terms; i++) {

        /* decompress the term text */
        overlap = Kino1_InStream_decode_vint(tv_ptr);
        SvCUR_set(text_sv, overlap);
        len = Kino1_InStream_decode_vint(tv_ptr);
        sv_catpvn(text_sv, *tv_ptr, len);



( run in 0.438 second using v1.01-cache-2.11-cpan-5511b514fd6 )