XAO-Indexer

 view release on metacpan or  search on metacpan

lib/XAO/DO/Web/Indexer.pm  view on Meta::CPAN


use vars qw($VERSION);
$VERSION=(0+sprintf('%u.%03u',(q$Id: Indexer.pm,v 1.4 2008/07/05 07:02:46 am Exp $ =~ /\s(\d+)\.(\d+)\s/))) || die "Bad VERSION";

###############################################################################

sub check_mode ($%) {
    my $self=shift;
    my $args=get_args(\@_);
    my $mode=$args->{mode} || 'path-map';

    if($mode eq 'search') {
        $self->search($args);
    }
    else {
        $self->SUPER::check_mode($args);
    }
}

###############################################################################

sub search ($%) {
    my $self=shift;
    my $args=get_args(\@_);

    my $index_id=$args->{'index_id'} ||
        throw $self "search - no 'index_id' given";

    my $orderby=$args->{'orderby'} ||
        throw $self "search - no 'orderby' given";

    my $keywords;
    if($args->{'keywords.param'}) {
        $keywords=lc($self->cgi->param($args->{'keywords.param'}));
    }
    else {
        $keywords=lc($self->decode_charset($args->{'keywords'})) ||
            throw $self "search - no 'keywords' given";
    }
    $keywords=~s/^\s*(.*?)\s*$/$1/s;

    ### dprint ">>>$index_id >> $orderby >> '$keywords'";

    ##
    # Limit is not supported by the indexed search internally, we
    # enforce it manually below.
    # TODO: Support internally
    #
    my $limit=$args->{'limit'};

    ##
    # Searching. If we have ignored words templates then building the
    # list of ignored words as well.
    #
    my $index_obj=$self->odb->fetch('/Indexes')->get($index_id);
    my $obj_ids;
    my $page=$self->object;
    my $ignored_text='';
    my $spelling_text='';
    if($args->{'ignored.path'} || $args->{'ignored.template'} || $args->{'spelling.path'} || $args->{'spelling.template'}) {
        my %rcdata;
        $obj_ids=$index_obj->search_by_string($orderby,$keywords,\%rcdata);

        if($args->{'ignored.path'} || $args->{'ignored.template'}) {
            my $iw=$rcdata{'ignored_words'};
            my $iw_num=scalar keys %$iw;
            if($iw_num) {
                $ignored_text.=$page->expand($args,{
                    path        => $args->{'ignored.header.path'},
                    template    => $args->{'ignored.header.template'},
                    TOTAL_WORDS => $iw_num,
                }) if $args->{'ignored.header.path'} || $args->{'ignored.header.template'};

                my $first=1;
                foreach my $w (keys %$iw) {
                    if($first) {
                        undef $first;
                    }
                    elsif($args->{'ignored.separator.path'} || $args->{'ignored.separator.template'}) {
                        $ignored_text.=$page->expand($args,{
                            path        => $args->{'ignored.separator.path'},
                            template    => $args->{'ignored.separator.template'},
                        });
                    }
                    $ignored_text.=$page->expand($args,{
                        path        => $args->{'ignored.path'},
                        template    => $args->{'ignored.template'},
                        WORD        => $w,
                        COUNT       => $iw->{$w},
                        TOTAL_WORDS => $iw_num,
                    });
                }

                $ignored_text.=$page->expand($args,{
                    path        => $args->{'ignored.footer.path'},
                    template    => $args->{'ignored.footer.template'},
                    TOTAL_WORDS => $iw_num,
                }) if $args->{'ignored.footer.path'} || $args->{'ignored.footer.template'};
            }
        }

        my $trigger=$args->{'spelling.trigger'} || 3;
        if(@$obj_ids<$trigger && $args->{'spelling.path'} || $args->{'spelling.template'}) {
            $index_obj->suggest_alternative($orderby,$keywords,\%rcdata);

            my @alt_kw;
            my @alt_kw_html;
            foreach my $i (0,1) {
                my $spdata=$rcdata{'spellchecker_alternatives'}->[$i];
                last unless $spdata &&
                            $spdata->{'query'} &&
                            $spdata->{'distance'}<=3;

                my $alt_query=$spdata->{'query'};
                my $alt_query_html=t2ht($alt_query);
                foreach my $pair (@{$spdata->{'pairs'}}) {
                    my $altword=t2ht($pair->[1]);
                    next unless length($altword);
                    $alt_query_html=~s/\b($altword)\b/<em><strong>$1<\/em><\/strong>/sg;
                }

                $alt_kw[$i]=$alt_query;
                $alt_kw_html[$i]=$alt_query_html;
            }
            @alt_kw && dprint "Got alternative keyword '",$alt_kw[0],"' and '",$alt_kw[1],"' for '$keywords'";

            $spelling_text=$page->expand($args,{
                'path'                  => $args->{'spelling.path'},
                'template'              => $args->{'spelling.template'},
                'ALT_KEYWORDS_1'        => $alt_kw[0] || '',
                'ALT_KEYWORDS_1.HTML'   => $alt_kw_html[0] || '',
                'ALT_KEYWORDS_2'        => $alt_kw[1] || '',
                'ALT_KEYWORDS_2.HTML'   => $alt_kw_html[1] || '',
            });
        }
    }
    else {
        $obj_ids=$index_obj->search_by_string($orderby,$keywords);
    }
    dprint "Got ".scalar(@$obj_ids)." results searching $index_id for '$keywords', ordering by $orderby";

    ##
    # Removing some IDs if required. Unfortunately, we have to translate
    # IDs into Coll.IDs first.
    #
    my $obj_coll=$index_obj->get_collection_object;
    if($args->{'exclude.field'} && defined($args->{'exclude.value'})) {
        my $sr=$obj_coll->search($args->{'exclude.field'},'eq',$args->{'exclude.value'});
        if(@$sr) {
            my %e;
            foreach my $coll_id (@$sr) {
                $e{$obj_coll->get($coll_id)->collection_key}=1;
            }
            if($limit && scalar(@$obj_ids)>$limit) {
                splice(@$obj_ids,$limit+1);
            }
            my @new_ids;
            foreach my $coll_id (@$obj_ids) {
                next if $e{$coll_id};
                push(@new_ids,$coll_id);
            }
            dprint ".after exclusion ".scalar(@new_ids);
            $obj_ids=\@new_ids;
        }
    }

    ##
    # Dropping extra elements if there is a 'limit'
    #



( run in 0.848 second using v1.01-cache-2.11-cpan-f56aa216473 )