Algorithm-LDA

 view release on metacpan or  search on metacpan

lib/Algorithm/LDA.pm  view on Meta::CPAN

my %map = ();

my $V;
my $v;
my @alpha;
my @theta;
my @beta;
my @phi;

my $totalDocs;
my $maxIterations;
my $updateCorpus;
my $threshold;
my $numWords;
my $alpha;

my $documentNum = 0;


my $self;

sub new 
{
    my $class = shift;
    $self = {
        _data => shift,
        _numTopics => shift,
        _maxIterations => shift,
        _totalDocs => shift,
        _updateCorpus => shift,
        _wordThreshold => shift,
        _alpha => shift,
        _numWords => shift,
        _stop => shift,

        docs          => [],

        document_topic_map => {},
        topic_word_map     => {},
        document_map       => {},
        topic_map          => {},
        word_map           => {},
    };
    

    $docs = $self->{_data};
    $data = $self->{_data};
    
    $K = $self->{_numTopics};
    $k = $K - 1;
    $maxIterations = $self->{_maxIterations};
    $totalDocs = $self->{_totalDocs};
    $updateCorpus = $self->{_updateCorpus};
    $threshold = $self->{_wordThreshold};
    $alpha = $self->{_alpha};
    $numWords = $self->{_numWords};
    $stop = $self->{_stop};
    
    @{$self->{documents}} = (); 

    bless $self, $class;
    
    init();
    
    return $self;
}

=head3 add

description:

 Used to add to array of documents ($self->documents)

input:   

 %args <- hash containing data

output:

 1

example:

 while (my $line = <$fh2>) {
    my $obj = decode_json($line);
    add(%$obj);
 }

=cut

#Used to add to array of documents ($self->documents)
#Adds a word with document ID and random topic
sub add 
{
    my (%args) = @_;
    return unless (valid($args{data}));
    

    my $document_id = @{$self->documents};
    my @data_list = map {
    	{ document => $document_id, topic => int(rand($K)), word => $_ }
    } @{$args{data}};

    for my $data (@data_list) 
    {
        $self->increaseMap($document_id, $data->{topic}, $data->{word});
    }

    push(@{$self->documents}, \@data_list);

    return 1;
}

=head3 init

description:

 Initializes alpha, initializes beta, loads documents, starts main loop

input:   



( run in 2.664 seconds using v1.01-cache-2.11-cpan-140bd7fdf52 )