App-CSE

 view release on metacpan or  search on metacpan

lib/App/CSE/Lucy/Search/QueryPrefix.pm  view on Meta::CPAN

package App::CSE::Lucy::Search::QueryPrefix;
$App::CSE::Lucy::Search::QueryPrefix::VERSION = '0.016';
## Copied from http://api.metacpan.org/source/CREAMYG/Lucy-0.3.3/sample/PrefixQuery.pm

# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

use strict;
use warnings;

use base qw( Lucy::Search::Query );
use Carp;
use Scalar::Util qw( blessed );


# Inside-out member vars and hand-rolled accessors.
my %query_string;
my %field;
my %highlight_terms;
my %keep_case;

sub get_query_string { my $self = shift; return $query_string{$$self} }
sub get_field        { my $self = shift; return $field{$$self} }
sub highlight_terms  { my $self = shift; return $highlight_terms{$$self} }
sub keep_case        { my $self = shift; return $keep_case{$$self} }



sub new {
    my ( $class, %args ) = @_;
    my $query_string = delete $args{query_string};
    my $field        = delete $args{field};
    my $keep_case    = delete $args{keep_case} || 0;

    my $self         = $class->SUPER::new(%args);


    confess("'query_string' param is required")
        unless defined $query_string;
    confess("Invalid query_string: '$query_string'")
        unless $query_string =~ /\*\s*$/;
    confess("'field' param is required")
        unless defined $field;

    $query_string{$$self} = $query_string;
    $field{$$self}        = $field;
    $highlight_terms{$$self} = [];
    $keep_case{$$self} = $keep_case;

    return $self;
  }

sub highlight_query{
  my ($self, $field) = @_;
  my $query = Lucy::Search::ORQuery->new();
  foreach my $term ( @{$self->highlight_terms()} ){
    $query->add_child(Lucy::Search::TermQuery->new(
                                                   field => $field || $self->get_field(),
                                                   term  => $term,
                                                  ));
  }
  return $query;
}


sub add_matching_term{
  my ($self, $term ) = @_;
  push @{$self->highlight_terms()}, $term;
}

sub DESTROY {
    my $self = shift;
    delete $query_string{$$self};
    delete $field{$$self};
    delete $highlight_terms{$$self};
    $self->SUPER::DESTROY;
  }

sub equals {
    my ( $self, $other ) = @_;
    return 0 unless blessed($other);
    return 0 unless $other->isa("App::CSE::Lucy::Search::QueryPrefix");
    return 0 unless $field{$$self} eq $field{$$other};
    return 0 unless $query_string{$$self} eq $query_string{$$other};
    return 1;
  }

sub to_string {
    my $self = shift;
    return "$field{$$self}:$query_string{$$self}";
  }

sub make_compiler {
  my ( $self, %args ) = @_;
  my $subordinate = delete $args{subordinate};
  my $compiler = App::CSE::Lucy::Search::PrefixCompiler->new( %args, parent => $self );
  $compiler->normalize unless $subordinate;
  return $compiler;
}

1;

package App::CSE::Lucy::Search::PrefixCompiler;
$App::CSE::Lucy::Search::PrefixCompiler::VERSION = '0.016';
use base qw( Lucy::Search::Compiler );

sub make_matcher {
  my ( $self, %args ) = @_;
  my $seg_reader = $args{reader};

  # Retrieve low-level components LexiconReader and PostingListReader.
  my $lex_reader
    = $seg_reader->obtain("Lucy::Index::LexiconReader");
  my $plist_reader
    = $seg_reader->obtain("Lucy::Index::PostingListReader");


  # Acquire a Lexicon and seek it to our query string.
  my $substring = $self->get_parent->get_query_string;
  $substring =~ s/\*\s*$//;

  ## Making that case insensitive only if the parent says so
  unless( $self->get_parent->keep_case() ){
    $substring = lc( $substring );
  }

  my $field = $self->get_parent->get_field;
  my $lexicon = $lex_reader->lexicon( field => $field );
  return unless $lexicon;


  $lexicon->seek($substring);

  # Accumulate PostingLists for each matching term.
  my @posting_lists;
  while ( defined( my $term = $lexicon->get_term ) ) {
    last unless $term =~ /^\Q$substring/;

    $self->get_parent()->add_matching_term($term);

    my $posting_list = $plist_reader->posting_list(
                                                   field => $field,
                                                   term  => $term,
                                                  );
    if ($posting_list) {
      push @posting_lists, $posting_list;
    }
    last unless $lexicon->next;
  }
  return unless @posting_lists;

  return App::CSE::Lucy::Search::PrefixMatcher->new( posting_lists => \@posting_lists );
}

package App::CSE::Lucy::Search::PrefixMatcher;
$App::CSE::Lucy::Search::PrefixMatcher::VERSION = '0.016';
use base qw( Lucy::Search::Matcher );

# Inside-out member vars.
my %doc_ids;
my %tick;

sub score{
  return 1.0 ; # Fixed score of 1.0
}

sub new {
  my ( $class, %args ) = @_;
  my $posting_lists = delete $args{posting_lists};
  my $self          = $class->SUPER::new(%args);

  # Cheesy but simple way of interleaving PostingList doc sets.
  my %all_doc_ids;
  for my $posting_list (@$posting_lists) {
    while ( my $doc_id = $posting_list->next ) {
      $all_doc_ids{$doc_id} = undef;
    }
  }
  my @doc_ids = sort { $a <=> $b } keys %all_doc_ids;
  $doc_ids{$$self} = \@doc_ids;

  $tick{$$self}  = -1;

  return $self;
}

sub DESTROY {
  my $self = shift;
  delete $doc_ids{$$self};
  delete $tick{$$self};
  $self->SUPER::DESTROY;
}

sub next {
  my $self    = shift;
  my $doc_ids = $doc_ids{$$self};
  my $tick    = ++$tick{$$self};
  return 0 if $tick >= scalar @$doc_ids;
  return $doc_ids->[$tick];
}

sub get_doc_id {
  my $self    = shift;
  my $tick    = $tick{$$self};
  my $doc_ids = $doc_ids{$$self};
  return $tick < scalar @$doc_ids ? $doc_ids->[$tick] : 0;
}

1;



( run in 0.660 second using v1.01-cache-2.11-cpan-8f98c5d2c55 )