view release on metacpan or search on metacpan
lib/XAO/DO/Data/Index.pm view on Meta::CPAN
if(keys %{$sd{ignored_words}}) {
print "Ignored words:\n";
foreach my $word (sort keys %{$sd{ignored_words}}) {
print " * $word ($sd{ignored_words}->{$word}\n";
}
}
=cut
sub search_by_string ($$$;$) {
my ($self,$ordering,$str,$rcdata)=@_;
return $self->indexer->search(
index_object => $self,
search_string => $str,
ordering => $ordering,
rcdata => $rcdata,
);
}
###############################################################################
=item search_by_string_oid ($)
The same as search_by_string() method, but translates results from
collection IDs to object IDs. Use it with care, on large result sets it
may take significant time!
=cut
sub search_by_string_oid ($$$;$) {
my ($self,$ordering,$str,$rcdata)=@_;
my $cids=$self->indexer->search(
index_object => $self,
search_string => $str,
ordering => $ordering,
rcdata => $rcdata,
);
my $coll_obj=$self->get_collection_object;
my @oids;
foreach my $cid (@$cids) {
try {
push(@oids,$coll_obj->get($cid)->container_key);
}
otherwise {
my $e=shift;
lib/XAO/DO/Data/Index.pm view on Meta::CPAN
=item suggest_alternative ($$$)
Returns an alternative search string by trying words found during
search_by_string and stored in the returned data array.
EXPERIMENTAL UNSTABLE API.
=cut
sub suggest_alternative ($$$$;$) {
my ($self,$ordering,$str,$rcdata,$need_results)=@_;
return $self->indexer->suggest_alternative(
index_object => $self,
search_string => $str,
ordering => $ordering,
rcdata => $rcdata,
need_results => $need_results,
);
}
###############################################################################
=item update ($)
Updates the index with the current data. Exactly what data it is based
on depends entirely on the corresponding indexer object.
lib/XAO/DO/Indexer/Base.pm view on Meta::CPAN
}
my $ordering=$args->{'ordering'} ||
throw $self "search - no 'ordering'";
my $ordering_seq=$self->get_orderings->{$ordering}->{'seq'} ||
throw $self "search - no sequence in '$ordering' ordering";
##
# Optional hash reference to be filled with statistics
#
my $rcdata=$args->{'rcdata'};
$rcdata->{'ignored_words'}={ } if $rcdata;
### dprint "Searching for '$str' (ordering=$ordering, seq=$ordering_seq)";
##
# Preparing spellchecker if needed
#
my $use_spellchecker=$rcdata && $self->config_param('use_spellchecker');
my $spellchecker;
if($use_spellchecker) {
$spellchecker=$self->get_spellchecker;
$spellchecker->switch_index($index_object->container_key);
$rcdata->{'spellchecker_words'}={ };
}
##
# We cache ignored words. Cache returns a hash reference with
# ignored words.
#
my $i_cache=get_current_project()->cache(
name => 'indexer_ignored',
coords => [ 'index_id' ],
expire => 60,
lib/XAO/DO/Indexer/Base.pm view on Meta::CPAN
my @simple;
foreach my $elt (@mdata) {
my $s=$self->analyze_text_split(0,$elt);
next unless @$s;
if(@$s==1) {
push(@simple,$s->[0]);
}
else {
if($spellchecker) {
my $pairs=$spellchecker->suggest_replacements(join(' ',@$s));
@{$rcdata->{'spellchecker_words'}}{keys %$pairs}=values %$pairs;
}
my @t=map {
if(exists $ignored->{$_}) {
if($rcdata) {
$rcdata->{'ignored_words'}->{$_}=$ignored->{$_};
}
undef;
}
else {
$_;
}
} @$s;
shift(@t) while @t && !defined($t[0]);
pop(@t) while @t && !defined($t[$#t]);
if(@t==1) {
lib/XAO/DO/Indexer/Base.pm view on Meta::CPAN
}
}
}
undef @mdata;
##
# Simple words
#
if($spellchecker) {
my $pairs=$spellchecker->suggest_replacements($str);
@{$rcdata->{'spellchecker_words'}}{keys %$pairs}=values %$pairs;
}
push(@simple,map {
if(exists $ignored->{$_}) {
if($rcdata) {
$rcdata->{'ignored_words'}->{$_}=$ignored->{$_};
}
();
}
else {
$_;
}
} @{$self->analyze_text_split(0,$str)});
##
# If we are asked to provide data, storing splitted words.
#
if($rcdata) {
$rcdata->{'words_single'}=\@simple;
$rcdata->{'words_multi'}=\@multi;
$rcdata->{'results_count'}=0;
}
### dprint Dumper(\@multi),Dumper(\@simple);
##
# First we search for multi-words sequences in the assumption that
# they will provide smaller result sets or no results at all.
#
my @results;
my $data_list=$index_object->get('Data');
foreach my $marr (sort { scalar(@$b) <=> scalar(@$a) } @multi) {
lib/XAO/DO/Indexer/Base.pm view on Meta::CPAN
### dprint "Simple Results: '$kw' ",Dumper($res);
if(!@$res) {
return [ ];
}
push(@results,$res);
}
##
# Joining all results together
#
if($rcdata) {
my $sr=XAO::IndexerSupport::sorted_intersection(@results);
$rcdata->{'results_count'}=scalar(@$sr);
return $sr;
}
else {
return XAO::IndexerSupport::sorted_intersection(@results);
}
}
###############################################################################
sub search_multi ($$$$) {
lib/XAO/DO/Indexer/Base.pm view on Meta::CPAN
###############################################################################
sub suggest_alternative ($%) {
my $self=shift;
my $args=get_args(\@_);
my $index_object=$args->{'index_object'} ||
throw $self "search - no 'index_object'";
my $rcdata=$args->{'rcdata'} ||
throw $self "suggest_alternatives - need rcdata";
my $query=$args->{'search_string'} ||
throw $self "suggest_alternatives - need search_string";
my $spwords=$rcdata->{'spellchecker_words'};
return '' unless $spwords;
##
# This can be used to improve results when using a generic
# dictionary on a small dataset, not a dictionary based on the
# actual index content (which guarantees that we already get only
# valid substitutes).
#
my %wcounts;
my $max_alt_words=$self->config_param('spellchecker/max_alt_words') || 0;
lib/XAO/DO/Indexer/Base.pm view on Meta::CPAN
### delete $words{$word};
### }
### }
### }
else {
throw $self "suggest_alternative - '$algorithm' algorithm is not supported";
}
# Now building other alternative strings and returning them in order.
#
my $results_count=$rcdata->{'results_count'} || 0;
my @alts;
for(my $i=0; $i<@jobs; ++$i) {
my $distance=$jobs[$i]->{'distance'} || 1;
$distance<=$max_result_distance || next;
my $newq=$query;
my $pairs=$jobs[$i]->{'pairs'};
my @finalpairs;
for(my $j=0; $j<@$pairs; $j+=2) {
my $word=$pairs->[$j];
lib/XAO/DO/Indexer/Base.pm view on Meta::CPAN
count => $newcount,
distance => $distance,
});
last if scalar(@alts)>=$max_alt_results;
}
}
# Storing all variants and returning the first one.
#
$rcdata->{'spellchecker_alternatives'}=\@alts;
return @alts ? $alts[0]->{'query'} : undef;
}
###############################################################################
sub update ($%) {
my $self=shift;
my $args=get_args(\@_);
my $index_object=$args->{'index_object'} ||
lib/XAO/DO/Web/Indexer.pm view on Meta::CPAN
##
# Searching. If we have ignored words templates then building the
# list of ignored words as well.
#
my $index_obj=$self->odb->fetch('/Indexes')->get($index_id);
my $obj_ids;
my $page=$self->object;
my $ignored_text='';
my $spelling_text='';
if($args->{'ignored.path'} || $args->{'ignored.template'} || $args->{'spelling.path'} || $args->{'spelling.template'}) {
my %rcdata;
$obj_ids=$index_obj->search_by_string($orderby,$keywords,\%rcdata);
if($args->{'ignored.path'} || $args->{'ignored.template'}) {
my $iw=$rcdata{'ignored_words'};
my $iw_num=scalar keys %$iw;
if($iw_num) {
$ignored_text.=$page->expand($args,{
path => $args->{'ignored.header.path'},
template => $args->{'ignored.header.template'},
TOTAL_WORDS => $iw_num,
}) if $args->{'ignored.header.path'} || $args->{'ignored.header.template'};
my $first=1;
foreach my $w (keys %$iw) {
lib/XAO/DO/Web/Indexer.pm view on Meta::CPAN
$ignored_text.=$page->expand($args,{
path => $args->{'ignored.footer.path'},
template => $args->{'ignored.footer.template'},
TOTAL_WORDS => $iw_num,
}) if $args->{'ignored.footer.path'} || $args->{'ignored.footer.template'};
}
}
my $trigger=$args->{'spelling.trigger'} || 3;
if(@$obj_ids<$trigger && $args->{'spelling.path'} || $args->{'spelling.template'}) {
$index_obj->suggest_alternative($orderby,$keywords,\%rcdata);
my @alt_kw;
my @alt_kw_html;
foreach my $i (0,1) {
my $spdata=$rcdata{'spellchecker_alternatives'}->[$i];
last unless $spdata &&
$spdata->{'query'} &&
$spdata->{'distance'}<=3;
my $alt_query=$spdata->{'query'};
my $alt_query_html=t2ht($alt_query);
foreach my $pair (@{$spdata->{'pairs'}}) {
my $altword=t2ht($pair->[1]);
next unless length($altword);
$alt_query_html=~s/\b($altword)\b/<em><strong>$1<\/em><\/strong>/sg;
scripts/xao-indexer view on Meta::CPAN
##
# Searching
#
if($search) {
my $i_name=shift @ARGV;
my $ordering=shift @ARGV;
my $index=$index_list->get($i_name);
foreach my $str (@ARGV) {
dprint "Searching '$i_name' for '$str'";
my %rcdata;
my $sr=$index->search_by_string($ordering,$str,\%rcdata);
if($alternatives) {
$index->suggest_alternative($ordering,$str,\%rcdata);
}
dprint Dumper(\%rcdata);
unless($no_results) {
print join("\n",@$sr),"\n";
}
}
exit 0;
}
##
# Creating new index structure
t/testcases/Indexer/incremental.pm view on Meta::CPAN
of => 1,
},
},
);
foreach my $test_id (keys %matrix) {
my $test=$matrix{$test_id};
my $query=$test->{query};
foreach my $oname (sort keys %$test) {
next if $oname eq 'query';
next if $oname eq 'ignored';
my %rcdata;
my $sr;
if($test->{ignored}) {
$sr=$foo_index->search_by_string($oname,$query,\%rcdata);
foreach my $w (keys %{$test->{ignored}}) {
my $expect=$test->{ignored}->{$w};
my $got=$rcdata{ignored_words}->{$w};
if($expect) {
$self->assert(defined($got),
"Expected '$w' to be ignored, but it is not");
}
else {
$self->assert(!defined($got),
"Expected '$w' not to be ignored, but it is (count=".($got||'').")");
}
}
}
t/testcases/Indexer/search.pm view on Meta::CPAN
},
);
foreach my $test_id (keys %matrix) {
my $test=$matrix{$test_id};
my $query=$test->{'query'};
foreach my $oname (sort keys %$test) {
next if $oname eq 'query';
next if $oname eq 'ignored';
next if $oname eq 'use_oid';
my %rcdata;
my $sr;
if($test->{'ignored'}) {
$sr=$test->{'use_oid'} ? $foo_index->search_by_string_oid($oname,$query,\%rcdata)
: $foo_index->search_by_string($oname,$query,\%rcdata);
foreach my $w (keys %{$test->{'ignored'}}) {
my $expect=$test->{'ignored'}->{$w};
my $got=$rcdata{'ignored_words'}->{$w};
if(defined $expect) {
$self->assert(defined($got),
"Expected '$w' to be ignored, but it is not");
$self->assert($got == $expect,
"Expected count $expect on ignored $w, got $got");
}
else {
$self->assert(!defined($got),
"Expected '$w' not to be ignored, but it is (count=".($got||'').")");
}
t/testcases/Indexer/spellchecker.pm view on Meta::CPAN
);
foreach my $test_id (keys %matrix) {
my $test=$matrix{$test_id};
my $query=$test->{'query'};
foreach my $oname (sort keys %$test) {
next if $oname eq 'query';
next if $oname eq 'ignored';
next if $oname eq 'use_oid';
next if $oname eq 'speller';
next if $oname eq 'speller_query';
my %rcdata;
my $sr;
if($test->{'ignored'} || $test->{'speller'} || $test->{'speller_query'}) {
$sr=$test->{'use_oid'} ? $foo_index->search_by_string_oid($oname,$query,\%rcdata)
: $foo_index->search_by_string($oname,$query,\%rcdata);
if($test->{'speller'}) {
my $got=$rcdata{'spellchecker_words'};
$self->assert(defined($got) && ref($got) eq 'HASH',
"Expected a spellchecked list");
foreach my $word (keys %{$test->{'speller'}}) {
$self->assert(exists($got->{$word}),
"Expected a spell-suggestion for $word");
my $expect=$test->{'speller'}->{$word};
$self->assert(scalar(grep { $_ eq $expect } @{$got->{$word}}),
"Expected spell-suggestion '".$test->{'speller'}->{$word}."' for '$word'");
}
}
if($test->{'speller_query'}) {
my $got=$foo_index->suggest_alternative($oname,$query,\%rcdata);
$self->assert($got eq $test->{'speller_query'},
"Expected alternative query '$test->{'speller_query'}', got '$got'");
if($test->{'speller'}) {
foreach my $word (keys %{$test->{'speller'}}) {
my $altword=$test->{'speller'}->{$word};
next if $altword eq $word;
my $pairs=$rcdata{'spellchecker_alternatives'}->[0]->{'pairs'};
$self->assert(scalar(grep { $_->[0] eq $word && $_->[1] eq $altword } @$pairs),
"Expected to have a replacement pair ($word->$altword) for $query");
}
}
}
if($test->{'ignored'}) {
foreach my $w (keys %{$test->{'ignored'}}) {
my $expect=$test->{'ignored'}->{$w};
my $got=$rcdata{'ignored_words'}->{$w};
if(defined $expect) {
$self->assert(defined($got),
"Expected '$w' to be ignored, but it is not");
$self->assert($got == $expect,
"Expected count $expect on ignored $w, got $got");
}
else {
$self->assert(!defined($got),
"Expected '$w' not to be ignored, but it is (count=".($got||'').")");
}