Algorithm-LDA

 view release on metacpan or  search on metacpan

lib/Algorithm/LDA.pm  view on Meta::CPAN

=cut

#Used to add to array of documents ($self->documents)
#Adds a word with document ID and random topic
sub add 
{
    my (%args) = @_;
    return unless (valid($args{data}));
    

    my $document_id = @{$self->documents};
    my @data_list = map {
    	{ document => $document_id, topic => int(rand($K)), word => $_ }
    } @{$args{data}};

    for my $data (@data_list) 
    {
        $self->increaseMap($document_id, $data->{topic}, $data->{word});
    }

    push(@{$self->documents}, \@data_list);

    return 1;
}

=head3 init

description:

 Initializes alpha, initializes beta, loads documents, starts main loop

input:   

 None

output:

 1

example:

 init();

=cut

#Initialization Method
sub init 
{    
    #Load Documents
    load();
    
    #Initialize @alpha to default value
    $alpha[$_] = $alpha for(0..$k);

    #Randomly initialize beta distribution
    beta();
    
    #Start Main loop
    for my $iter (1..$maxIterations) 
    {   
        #Calculate and print percentage completed
        my $a = $iter * 100 / $maxIterations;
        print "Iteration: $iter   |   $a% Completed...\n";
        
        #Shuffle Documents
        @{$self->documents} = shuffle(@{$self->documents});
        
        #Loop through each word in each document and sample its topic
        for my $document (@{$self->documents}) 
        {
	    print STDERR "Processing Document $document\n";
            for my $data (@$document) 
            {
                $self->decreaseMap($data->{document}, $data->{topic}, $data->{word});
                $data->{topic} = $self->sample_topic($data->{document}, $data->{word});
                $self->increaseMap($data->{document}, $data->{topic}, $data->{word});
            }
        }
        
        #print results for this iteration
        printResults($iter);
    }
        
    return 1;
}

=head3 printResults

description:

 Prints words in each topic, topics in each document, phi values, 
 and theta values to text files in the 'Results/$data' directory

input:   

 None

output:

 None

example:

 printResults();

=cut

#Creates four files in "Results/$data"
    # Documents.$data.txt - topic distribution for each document
    # Topics.$data.txt - word distribution for each topic
    # phi.$data.txt - Phi values per topic
    # theta.$data.txt Theta values per document

sub printResults
{
    print STDERR "Printing Results\n";

    my $iter = shift; 

    if(! (-e "Results")) { 
	system "mkdir Results"; 



( run in 1.659 second using v1.01-cache-2.11-cpan-39bf76dae61 )