Algorithm-LDA
view release on metacpan or search on metacpan
lib/Algorithm/LDA.pm view on Meta::CPAN
my %map = ();
my $V;
my $v;
my @alpha;
my @theta;
my @beta;
my @phi;
my $totalDocs;
my $maxIterations;
my $updateCorpus;
my $threshold;
my $numWords;
my $alpha;
my $documentNum = 0;
my $self;
sub new
{
my $class = shift;
$self = {
_data => shift,
_numTopics => shift,
_maxIterations => shift,
_totalDocs => shift,
_updateCorpus => shift,
_wordThreshold => shift,
_alpha => shift,
_numWords => shift,
_stop => shift,
docs => [],
document_topic_map => {},
topic_word_map => {},
document_map => {},
topic_map => {},
word_map => {},
};
$docs = $self->{_data};
$data = $self->{_data};
$K = $self->{_numTopics};
$k = $K - 1;
$maxIterations = $self->{_maxIterations};
$totalDocs = $self->{_totalDocs};
$updateCorpus = $self->{_updateCorpus};
$threshold = $self->{_wordThreshold};
$alpha = $self->{_alpha};
$numWords = $self->{_numWords};
$stop = $self->{_stop};
@{$self->{documents}} = ();
bless $self, $class;
init();
return $self;
}
=head3 add
description:
Used to add to array of documents ($self->documents)
input:
%args <- hash containing data
output:
1
example:
while (my $line = <$fh2>) {
my $obj = decode_json($line);
add(%$obj);
}
=cut
#Used to add to array of documents ($self->documents)
#Adds a word with document ID and random topic
sub add
{
my (%args) = @_;
return unless (valid($args{data}));
my $document_id = @{$self->documents};
my @data_list = map {
{ document => $document_id, topic => int(rand($K)), word => $_ }
} @{$args{data}};
for my $data (@data_list)
{
$self->increaseMap($document_id, $data->{topic}, $data->{word});
}
push(@{$self->documents}, \@data_list);
return 1;
}
=head3 init
description:
Initializes alpha, initializes beta, loads documents, starts main loop
input:
( run in 2.664 seconds using v1.01-cache-2.11-cpan-140bd7fdf52 )