Algorithm-VSM

 view release on metacpan or  search on metacpan

lib/Algorithm/VSM.pm  view on Meta::CPAN

    print "\n\nMAP value: $self->{_map}\n\n";
}

sub display_precision_vs_recall_for_queries {
    my $self = shift;
    die "You must first invoke precision_and_recall_calculator function" 
        unless scalar(keys %{$self->{_precision_for_queries}});
    print "\n\nDisplaying precision and recall values for different queries:\n\n";
    foreach my $query (sort 
                         {get_integer_suffix($a) <=> get_integer_suffix($b)} 
                         keys %{$self->{_avg_precision_for_queries}}) {
        print "\n\nQuery $query:\n";
        print "\n   (The first value is for rank 1, the second value at rank 2, and so on.)\n\n";
        my @precision_vals = @{$self->{_precision_for_queries}->{$query}};
        @precision_vals = map {sprintf "%.3f", $_} @precision_vals;
        print "   Precision at rank  =>  @precision_vals\n";
        my @recall_vals = @{$self->{_recall_for_queries}->{$query}};
        @recall_vals = map {sprintf "%.3f", $_} @recall_vals;
        print "\n   Recall at rank   =>  @recall_vals\n";
    }
    print "\n\n";
}

sub get_query_sorted_average_precision_for_queries {
    my $self = shift;
    die "You must first invoke precision_and_recall_calculator function" 
        unless scalar(keys %{$self->{_avg_precision_for_queries}});
    my @average_precisions_for_queries = ();
    foreach my $query (sort 
                         {get_integer_suffix($a) <=> get_integer_suffix($b)} 
                         keys %{$self->{_avg_precision_for_queries}}) {
        my $output = sprintf "%.3f", $self->{_avg_precision_for_queries}->{$query};
        push @average_precisions_for_queries, $output;
    }
    return \@average_precisions_for_queries;
}

###################################  Utility Routines  ###################################

sub _check_for_illegal_params {
    my @params = @_;
    my @legal_params = qw / corpus_directory
                            corpus_vocab_db
                            doc_vectors_db
                            normalized_doc_vecs_db
                            use_idf_filter
                            stop_words_file
                            file_types
                            case_sensitive
                            max_number_retrievals
                            query_file
                            relevancy_file
                            min_word_length
                            want_stemming
                            lsa_svd_threshold
                            relevancy_threshold
                            break_camelcased_and_underscored
                            save_model_on_disk
                            debug
                          /;
    my $found_match_flag;
    foreach my $param (@params) {
        foreach my $legal (@legal_params) {
            $found_match_flag = 0;
            if ($param eq $legal) {
                $found_match_flag = 1;
                last;
            }
        }
        last if $found_match_flag == 0;
    }
    return $found_match_flag;
}

# checks whether an element is in an array:
sub contained_in {
    my $ele = shift;
    my @array = @_;
    my $count = 0;
    map {$count++ if $ele eq $_} @array;
    return $count;
}

# Meant only for an un-nested hash:
sub deep_copy_hash {
    my $ref_in = shift;
    my $ref_out = {};
    foreach ( keys %{$ref_in} ) {
        $ref_out->{$_} = $ref_in->{$_};
    }
    return $ref_out;
}

sub vec_scalar_product {
    my $vec1 = shift;
    my $vec2 = shift;
    croak "Something is wrong --- the two vectors are of unequal length"
        unless @$vec1 == @$vec2;
    my $product;
    for my $i (0..@$vec1-1) {
        $product += $vec1->[$i] * $vec2->[$i];
    }
    return $product;
}

sub vec_magnitude {
    my $vec = shift;
    my $mag_squared = 0;
    foreach my $num (@$vec) {
        $mag_squared += $num ** 2;
    }
    return sqrt $mag_squared;
}

sub sum {
    my $vec = shift;
    my $result;
    for my $item (@$vec) {
        $result += $item;
    }
    return $result;
}

sub simple_stemmer {
    my $word = shift;
    my $debug = shift;
    print "\nStemming the word:        $word\n" if $debug;
    $word =~ s/(.*[a-z][^aeious])s$/$1/i;
    $word =~ s/(.*[a-z]s)es$/$1/i;
    $word =~ s/(.*[a-z][ck])es$/$1e/i;
    $word =~ s/(.*[a-z]+)tions$/$1tion/i;
    $word =~ s/(.*[a-z]+)mming$/$1m/i;



( run in 2.256 seconds using v1.01-cache-2.11-cpan-cdf2f3d4e48 )