Algorithm-VSM

 view release on metacpan or  search on metacpan

examples/README  view on Meta::CPAN

(7)  For your first experiments with measuring the accuracy of retrieval  
     performance, execute the script

             calculate_precision_and_recall_for_VSM.pl
   
     This script first tries to estimate the relevancies of the corpus
     files to each of the queries in the file 'test_queries.txt'.  The
     module calculates the two measures Precision@rank and Recall@rank.
     The area under the Precision vs. Recall curve for each query is the
     accuracy of retrieval for that query.  Averaging of this result over
     all the queries yields the more global metric MAP (Mean Average
     Precision).
     
     As mentioned elsewhere in the module documentation, estimating
     relevancies in the manner carried out by the module is not safe.  
     Relevancies are supposed to be supplied by humans.  All that a computer 
     can do to estimate relevancies is to count the number of query words in a
     document.  But, measuring relevancies in this manner creates a circular
     dependency between the retrieval algorithm and the estimated
     relevancy.

examples/corpus_with_java_and_cpp/VirtualPrint2.cc  view on Meta::CPAN

    ~Manager(){}                                                  //(D)
};

int main()
{
    vector<Employee*> empList;

    Employee* e1 = new Employee( "mister", "bigshot", "megaCorp" );
    Employee* e2 = new Employee( "ms", "importante", "devourCorp" );
    Employee* m3 = new Manager("mister", "biggun", "plunderCorp" , 2);
    Employee* m4 = new Manager("ms", "shiningstar", "globalCorp", 2);  

    empList.push_back( e1 );
    empList.push_back( e2 );
    empList.push_back( m3 );
    empList.push_back( m4 );

    vector<Employee*>::iterator p = empList.begin();
    while ( p < empList.end() ) {                                 //(E)
        (*p++)->print();                                          //(F)
        cout << endl;

lib/Algorithm/VSM.pm  view on Meta::CPAN

    print "Scanning the directory '$self->{_corpus_directory}' for\n" .
        "  model construction\n\n" if $self->{_debug};
    $self->_scan_directory( $self->{_corpus_directory} );
    $self->_drop_stop_words() if $self->{_stop_words_file};
    if ($self->{_debug}) {
        foreach ( sort keys %{$self->{_vocab_hist_on_disk}} ) {               
            printf( "%s\t%d\n", $_, $self->{_vocab_hist_on_disk}->{$_} );    
        }
    }
    if ($self->{_save_model_on_disk}) {
        unlink glob "$self->{_corpus_vocab_db}.*";   
        unlink glob "$self->{_doc_vectors_db}.*";   
        unlink glob "$self->{_normalized_doc_vecs_db}.*";   
        tie %{$self->{_vocab_hist_on_disk}}, 'SDBM_File',  
                 $self->{_corpus_vocab_db}, O_RDWR|O_CREAT, 0640
                or die "Can't create DBM files: $!";       
        foreach (keys %{$self->{_vocab_hist}}) {
            $self->{_vocab_hist_on_disk}->{$_} = $self->{_vocab_hist}->{$_};
        }
        untie %{$self->{_vocab_hist_on_disk}};
    }
    $self->{_corpus_vocab_done} = 1;
    $self->{_vocab_size} = scalar( keys %{$self->{_vocab_hist}} );

lib/Algorithm/VSM.pm  view on Meta::CPAN

    print "\n\n";
}

###############################    Directory Scanner      ################################

sub _scan_directory {
    my $self = shift;
    my $dir = rel2abs( shift );
    my $current_dir = cwd;
    chdir $dir or die "Unable to change directory to $dir: $!";
    foreach ( glob "*" ) {                                            
        if ( -d and !(-l) ) {
            $self->_scan_directory( $_ );
            chdir $dir                                                
                or die "Unable to change directory to $dir: $!";
        } elsif (-r _ and 
                 -T _ and 
                 -M _ > 0.00001 and  # modification age is at least 1 sec
                !( -l $_ ) and 
                $self->ok_to_filetype($_) ) {
            $self->_scan_file_for_rels($_) if $self->{_scan_dir_for_rels};



( run in 0.551 second using v1.01-cache-2.11-cpan-49f99fa48dc )