Algorithm-LDA
view release on metacpan or search on metacpan
lib/Algorithm/LDA.pm view on Meta::CPAN
=cut
#Used to add to array of documents ($self->documents)
#Adds a word with document ID and random topic
sub add
{
my (%args) = @_;
return unless (valid($args{data}));
my $document_id = @{$self->documents};
my @data_list = map {
{ document => $document_id, topic => int(rand($K)), word => $_ }
} @{$args{data}};
for my $data (@data_list)
{
$self->increaseMap($document_id, $data->{topic}, $data->{word});
}
push(@{$self->documents}, \@data_list);
return 1;
}
=head3 init
description:
Initializes alpha, initializes beta, loads documents, starts main loop
input:
None
output:
1
example:
init();
=cut
#Initialization Method
sub init
{
#Load Documents
load();
#Initialize @alpha to default value
$alpha[$_] = $alpha for(0..$k);
#Randomly initialize beta distribution
beta();
#Start Main loop
for my $iter (1..$maxIterations)
{
#Calculate and print percentage completed
my $a = $iter * 100 / $maxIterations;
print "Iteration: $iter | $a% Completed...\n";
#Shuffle Documents
@{$self->documents} = shuffle(@{$self->documents});
#Loop through each word in each document and sample its topic
for my $document (@{$self->documents})
{
print STDERR "Processing Document $document\n";
for my $data (@$document)
{
$self->decreaseMap($data->{document}, $data->{topic}, $data->{word});
$data->{topic} = $self->sample_topic($data->{document}, $data->{word});
$self->increaseMap($data->{document}, $data->{topic}, $data->{word});
}
}
#print results for this iteration
printResults($iter);
}
return 1;
}
=head3 printResults
description:
Prints words in each topic, topics in each document, phi values,
and theta values to text files in the 'Results/$data' directory
input:
None
output:
None
example:
printResults();
=cut
#Creates four files in "Results/$data"
# Documents.$data.txt - topic distribution for each document
# Topics.$data.txt - word distribution for each topic
# phi.$data.txt - Phi values per topic
# theta.$data.txt Theta values per document
sub printResults
{
print STDERR "Printing Results\n";
my $iter = shift;
if(! (-e "Results")) {
system "mkdir Results";
( run in 1.659 second using v1.01-cache-2.11-cpan-39bf76dae61 )