Algorithm-VSM

 view release on metacpan or  search on metacpan

examples/retrieve_with_disk_based_VSM.pl  view on Meta::CPAN

#!/usr/bin/perl -w

##  retrieve_with_disk_based_VSM.pl

use strict;
use Algorithm::VSM;

##  You must first run the script:

##      retrieve_with_VSM_and_also_create_disk_based_model.pl

##  before executing the current script.  The script named above results in
##  the creation of a disk-based VSM model that can be used by the
##  current script for retrieval.

##  See Item 4 of the README of the `examples' directory


print "\nIMPORTANT:  We assume that you have previously called\n\n" .
      "   retrieve_with_VSM_and_also_create_disk_based_model.pl\n\n" .
      "on the same corpus with the following constructor options:\n\n" .
      "   use_idf_filter  => 1,                 \n" .
      "   save_model_on_disk  => 1,           \n\n";

my @query = qw/ string getAllChars throw IOException distinct TreeMap histogram map /;

#     The three databases mentioned in the next two statements are created
#     by calling the script
#     retrieve_with_VSM_and_also_create_disk_based_model.pl .  The first of
#     the databases stores the corpus vocabulary, the second term
#     frequencies for the vocabulary words, and the third the normalized
#     document vectors.  As to what is meant by normalization, see the
#     comments in the script retrieve_with_VSM.pl.
my $corpus_vocab_db = "corpus_vocab_db";
my $doc_vectors_db  = "doc_vectors_db";
my $normalized_doc_vecs_db  = "normalized_doc_vecs_db";

my $vsm = Algorithm::VSM->new( 
                   corpus_vocab_db           => $corpus_vocab_db, 
                   doc_vectors_db            => $doc_vectors_db,
                   normalized_doc_vecs_db    => $normalized_doc_vecs_db,
                   max_number_retrievals     => 10,
          );

#  Use the following call ONLY if you are setting the use_idf_filter option to
#  0 in the above constructor.
#$vsm->upload_vsm_model_from_disk();

$vsm->upload_normalized_vsm_model_from_disk();

#    Uncomment the following statement if you would like to see the corpus
#    vocabulary:
#$vsm->display_corpus_vocab();

#  Use the following call ONLY if you are setting the use_idf_filter option to
#  0 in the above constructor.
#$vsm->display_doc_vectors();

#    Uncomment the following statement if you would like to the individual
#    document vectors:
#$vsm->display_normalized_doc_vectors();

my $retrievals = $vsm->retrieve_with_vsm( \@query );

$vsm->display_retrievals( $retrievals );



( run in 0.878 second using v1.01-cache-2.11-cpan-39bf76dae61 )