Algorithm-VSM
view release on metacpan or search on metacpan
lib/Algorithm/VSM.pm view on Meta::CPAN
print "\n\nMAP value: $self->{_map}\n\n";
}
sub display_precision_vs_recall_for_queries {
my $self = shift;
die "You must first invoke precision_and_recall_calculator function"
unless scalar(keys %{$self->{_precision_for_queries}});
print "\n\nDisplaying precision and recall values for different queries:\n\n";
foreach my $query (sort
{get_integer_suffix($a) <=> get_integer_suffix($b)}
keys %{$self->{_avg_precision_for_queries}}) {
print "\n\nQuery $query:\n";
print "\n (The first value is for rank 1, the second value at rank 2, and so on.)\n\n";
my @precision_vals = @{$self->{_precision_for_queries}->{$query}};
@precision_vals = map {sprintf "%.3f", $_} @precision_vals;
print " Precision at rank => @precision_vals\n";
my @recall_vals = @{$self->{_recall_for_queries}->{$query}};
@recall_vals = map {sprintf "%.3f", $_} @recall_vals;
print "\n Recall at rank => @recall_vals\n";
}
print "\n\n";
}
sub get_query_sorted_average_precision_for_queries {
my $self = shift;
die "You must first invoke precision_and_recall_calculator function"
unless scalar(keys %{$self->{_avg_precision_for_queries}});
my @average_precisions_for_queries = ();
foreach my $query (sort
{get_integer_suffix($a) <=> get_integer_suffix($b)}
keys %{$self->{_avg_precision_for_queries}}) {
my $output = sprintf "%.3f", $self->{_avg_precision_for_queries}->{$query};
push @average_precisions_for_queries, $output;
}
return \@average_precisions_for_queries;
}
################################### Utility Routines ###################################
sub _check_for_illegal_params {
my @params = @_;
my @legal_params = qw / corpus_directory
corpus_vocab_db
doc_vectors_db
normalized_doc_vecs_db
use_idf_filter
stop_words_file
file_types
case_sensitive
max_number_retrievals
query_file
relevancy_file
min_word_length
want_stemming
lsa_svd_threshold
relevancy_threshold
break_camelcased_and_underscored
save_model_on_disk
debug
/;
my $found_match_flag;
foreach my $param (@params) {
foreach my $legal (@legal_params) {
$found_match_flag = 0;
if ($param eq $legal) {
$found_match_flag = 1;
last;
}
}
last if $found_match_flag == 0;
}
return $found_match_flag;
}
# checks whether an element is in an array:
sub contained_in {
my $ele = shift;
my @array = @_;
my $count = 0;
map {$count++ if $ele eq $_} @array;
return $count;
}
# Meant only for an un-nested hash:
sub deep_copy_hash {
my $ref_in = shift;
my $ref_out = {};
foreach ( keys %{$ref_in} ) {
$ref_out->{$_} = $ref_in->{$_};
}
return $ref_out;
}
sub vec_scalar_product {
my $vec1 = shift;
my $vec2 = shift;
croak "Something is wrong --- the two vectors are of unequal length"
unless @$vec1 == @$vec2;
my $product;
for my $i (0..@$vec1-1) {
$product += $vec1->[$i] * $vec2->[$i];
}
return $product;
}
sub vec_magnitude {
my $vec = shift;
my $mag_squared = 0;
foreach my $num (@$vec) {
$mag_squared += $num ** 2;
}
return sqrt $mag_squared;
}
sub sum {
my $vec = shift;
my $result;
for my $item (@$vec) {
$result += $item;
}
return $result;
}
sub simple_stemmer {
my $word = shift;
my $debug = shift;
print "\nStemming the word: $word\n" if $debug;
$word =~ s/(.*[a-z][^aeious])s$/$1/i;
$word =~ s/(.*[a-z]s)es$/$1/i;
$word =~ s/(.*[a-z][ck])es$/$1e/i;
$word =~ s/(.*[a-z]+)tions$/$1tion/i;
$word =~ s/(.*[a-z]+)mming$/$1m/i;
( run in 2.256 seconds using v1.01-cache-2.11-cpan-cdf2f3d4e48 )