Algorithm-VSM
view release on metacpan or search on metacpan
examples/README view on Meta::CPAN
(7) For your first experiments with measuring the accuracy of retrieval
performance, execute the script
calculate_precision_and_recall_for_VSM.pl
This script first tries to estimate the relevancies of the corpus
files to each of the queries in the file 'test_queries.txt'. The
module calculates the two measures Precision@rank and Recall@rank.
The area under the Precision vs. Recall curve for each query is the
accuracy of retrieval for that query. Averaging of this result over
all the queries yields the more global metric MAP (Mean Average
Precision).
As mentioned elsewhere in the module documentation, estimating
relevancies in the manner carried out by the module is not safe.
Relevancies are supposed to be supplied by humans. All that a computer
can do to estimate relevancies is to count the number of query words in a
document. But, measuring relevancies in this manner creates a circular
dependency between the retrieval algorithm and the estimated
relevancy.
examples/corpus_with_java_and_cpp/VirtualPrint2.cc view on Meta::CPAN
~Manager(){} //(D)
};
int main()
{
vector<Employee*> empList;
Employee* e1 = new Employee( "mister", "bigshot", "megaCorp" );
Employee* e2 = new Employee( "ms", "importante", "devourCorp" );
Employee* m3 = new Manager("mister", "biggun", "plunderCorp" , 2);
Employee* m4 = new Manager("ms", "shiningstar", "globalCorp", 2);
empList.push_back( e1 );
empList.push_back( e2 );
empList.push_back( m3 );
empList.push_back( m4 );
vector<Employee*>::iterator p = empList.begin();
while ( p < empList.end() ) { //(E)
(*p++)->print(); //(F)
cout << endl;
lib/Algorithm/VSM.pm view on Meta::CPAN
print "Scanning the directory '$self->{_corpus_directory}' for\n" .
" model construction\n\n" if $self->{_debug};
$self->_scan_directory( $self->{_corpus_directory} );
$self->_drop_stop_words() if $self->{_stop_words_file};
if ($self->{_debug}) {
foreach ( sort keys %{$self->{_vocab_hist_on_disk}} ) {
printf( "%s\t%d\n", $_, $self->{_vocab_hist_on_disk}->{$_} );
}
}
if ($self->{_save_model_on_disk}) {
unlink glob "$self->{_corpus_vocab_db}.*";
unlink glob "$self->{_doc_vectors_db}.*";
unlink glob "$self->{_normalized_doc_vecs_db}.*";
tie %{$self->{_vocab_hist_on_disk}}, 'SDBM_File',
$self->{_corpus_vocab_db}, O_RDWR|O_CREAT, 0640
or die "Can't create DBM files: $!";
foreach (keys %{$self->{_vocab_hist}}) {
$self->{_vocab_hist_on_disk}->{$_} = $self->{_vocab_hist}->{$_};
}
untie %{$self->{_vocab_hist_on_disk}};
}
$self->{_corpus_vocab_done} = 1;
$self->{_vocab_size} = scalar( keys %{$self->{_vocab_hist}} );
lib/Algorithm/VSM.pm view on Meta::CPAN
print "\n\n";
}
############################### Directory Scanner ################################
sub _scan_directory {
my $self = shift;
my $dir = rel2abs( shift );
my $current_dir = cwd;
chdir $dir or die "Unable to change directory to $dir: $!";
foreach ( glob "*" ) {
if ( -d and !(-l) ) {
$self->_scan_directory( $_ );
chdir $dir
or die "Unable to change directory to $dir: $!";
} elsif (-r _ and
-T _ and
-M _ > 0.00001 and # modification age is at least 1 sec
!( -l $_ ) and
$self->ok_to_filetype($_) ) {
$self->_scan_file_for_rels($_) if $self->{_scan_dir_for_rels};
( run in 0.551 second using v1.01-cache-2.11-cpan-49f99fa48dc )