Algorithm-VSM

 view release on metacpan or  search on metacpan

examples/continuously_running_VSM_retrieval_engine.pl  view on Meta::CPAN

#!/usr/bin/perl -w

##  continuously_running_VSM_retrieval_engine.pl 

##  This script puts the VSM-bsaed retrieval in an infinite loop so that
##  a user can repeatedly ask for retrievals for different query strings.

##  See Item 2 of the README of the `examples' directory for further info

use strict;
use Algorithm::VSM;

my $corpus_dir = "corpus";

my $stop_words_file = "stop_words.txt";    # This file will typically include the
                                           # keywords of the programming 
                                           # language(s) used in the software.
my $vsm = Algorithm::VSM->new( 
                   break_camelcased_and_underscored  => 1,  # default: 1
                   case_sensitive           => 0,           # default: 0
                   corpus_directory         => $corpus_dir,
                   file_types               => ['.txt', '.java'],
                   max_number_retrievals    => 10,
                   min_word_length          => 4,
                   stop_words_file          => $stop_words_file,
                   use_idf_filter           => 1,
                   want_stemming            => 1,           # default: 0
          );

$vsm->get_corpus_vocabulary_and_word_counts();
$vsm->generate_document_vectors();

while (1) {
    print "\nEnter your query in the next line (or just press `Enter' to exit):\n\n";
    my $query_string = <STDIN>;
    $query_string =~ s/\r?\n?$//;
    $query_string =~ s/(^\s*)|(\s*$)//g;
    die "... exiting: $!" if length($query_string) == 0;
    my @query = grep $_, split /\s+/, $query_string;
    my $retrievals = eval {
        $vsm->retrieve_with_vsm( \@query );
    };
    if ($@) {
        print "$@\n";
    } else {
        $vsm->display_retrievals( $retrievals );
    }
}



( run in 0.608 second using v1.01-cache-2.11-cpan-39bf76dae61 )