Algorithm-LDA
view release on metacpan or search on metacpan
TESTING PLATFORMS
Algorithm-LDA has been developed and tested on Linux primarily using
Algorithm, NetBeans 8.1 and the Bash shell.
REQUIREMENTS
Algorithm-LDA REQUIRES that the following software be installed. More
details on how to obtain and install appear below.
--Programming Languages Algorithm (version 5.8.5 or better)
--CPAN modules JSON::XS List::Util List::MoreUtils Class::Accessor::Fast
PROGRAMMING LANGUAGES
Algorithm (version 5.8.5 or better)
Algorithm is freely available at:
<http://www.perl.org>
It is very likely that you will already have Algorithm installed if you
are using a Unix/Linux based system.
CPAN MODULES
JSON::XS
Please see the JSON::XS documentation.
List::Util
Please see the List::Util documentation.
List::MoreUtils
Please see the List::MoreUtils documentation.
Class::Accessor::Fast
Please see the Class::Accessor::Fast documentation.
bin/lda.pl
CHANGES
INSTALL
lib/Algorithm/LDA.pm
Makefile.PL
MANIFEST This list of files
README
t/pod-coverage.t
t/pod.t
META.yml Module YAML meta-data (added by MakeMaker)
META.json Module JSON meta-data (added by MakeMaker)
}
},
"configure" : {
"requires" : {
"ExtUtils::MakeMaker" : "0"
}
},
"runtime" : {
"requires" : {
"Class::Accessor::Fast" : "0",
"JSON::XS" : "0",
"List::MoreUtils" : "0",
"List::Util" : "0"
}
}
},
"release_status" : "stable",
"version" : "0.03",
"x_serialization_backend" : "JSON::PP version 2.27300"
}
meta-spec:
url: http://module-build.sourceforge.net/META-spec-v1.4.html
version: '1.4'
name: Algorithm-LDA
no_index:
directory:
- t
- inc
requires:
Class::Accessor::Fast: '0'
JSON::XS: '0'
List::MoreUtils: '0'
List::Util: '0'
version: '0.03'
x_serialization_backend: 'CPAN::Meta::YAML version 0.018'
Makefile.PL view on Meta::CPAN
use ExtUtils::MakeMaker;
my $author1 = 'Bridget McInnes <btmcinnes@vcu.edu';
my $author2 = 'Nick Jordan';
WriteMakefile(
NAME => 'Algorithm::LDA',
VERSION_FROM => 'lib/Algorithm/LDA.pm',
PREREQ_PM => {
'JSON::XS' => 0,
'List::Util' => 0,
'List::MoreUtils' => 0,
'Class::Accessor::Fast' => 0,
},
EXE_FILES => [("bin/lda.pl")],
dist => {'COMPRESS' => 'gzip -9f', 'SUFFIX' => 'gz'},
($] >= 5.005 ?
(
AUTHOR => "$author1, $author2") : ()));
}
# $stop - Stopword list (regex)
my $stop = shift;
if(defined $opt_stop) {
$stop = $opt_stop;
}
# TODO -- add as options
# $maxIterations - Maximum Iterations
# $updateCorpus - 1 = Force update documents, 0 = allow loading from JSON
# $wordThreshold - Minimum number of documents a word must appear in
# $alpha - Default Alpha value
# $numWords - Number of words per topic
my $maxIterations = 1000;
if(defined $opt_iterations) {
$maxIterations = $opt_iterations;
}
my $updateCorpus = 0;
my $wordThreshold = 10;
my $alpha = 0.1;
lib/Algorithm/LDA.pm view on Meta::CPAN
use strict;
use 5.006;
use strict;
use warnings FATAL => 'all';
use constant pi => 4*atan2(1, 1);
use constant e => exp(1);
use parent qw/Class::Accessor::Fast/;
use List::Util qw(shuffle sum max);
use List::MoreUtils qw(uniq first_index);
use JSON::XS;
use vars qw($VERSION);
$VERSION = '0.03';
#Used for accessing $self->documents
__PACKAGE__->mk_accessors(qw/documents/);
lib/Algorithm/LDA.pm view on Meta::CPAN
# $V - vocabulary size
# $v - $V-1 (for convenience)
# @alpha - array of alpha values (parameter of topic distribution)
# @theta - array of theta values (topic distribution)
# @beta - array of beta values (parameter of word distribution)
# @phi - array of phi values (word distribution)
# $totalDocs - Total Documents (Only used for computing completeness when loading)
# $maxIterations - Maximum Iterations
# $updateCorpus - 1 = Force update documents, 0 = allow loading from JSON
# $threshold - Minimum number of documents a word must appear in
# $numWords - Number of words per topic
# $alpha - Default alpha value
# $documentNum - Number of documents
my $data;
my $docs;
my $stop;
lib/Algorithm/LDA.pm view on Meta::CPAN
}
close($fh3);
close($fh4);
}
=head3 load
description:
Loads documents from text files (in "data/$data") or JSON file (in "Documents")
input:
None
output:
None
example:
load();
=cut
#Loads document data from files or JSON
sub load
{
#open data directory
opendir(DH, "$docs");
my @files = grep { $_ ne '.' and $_ ne '..' } readdir DH;
closedir(DH);
#array holding string of words in each document
my @documents1 = ();
lib/Algorithm/LDA.pm view on Meta::CPAN
#Convert words to hashmap (for use of "exists") and remove unclean
# words from documents array
my %h;
@h{@words} = ();
for my $i (0..$#documents)
{
@{$documents[$i]} = grep{exists $h{$_}} @{$documents[$i]};
}
open(my $fh, '>', "JSON") or die "Could not open file 'JSON' $!";
foreach my $i (@documents)
{
print $fh "{\"data\":[\"" . join("\", \"", @{$i})."\"]}\n";
}
close $fh;
open(my $fh2, '<', "JSON") or die "Could not open file 'JSON' $!";
while (my $line = <$fh2>) {
my $obj = decode_json($line);
add(%$obj);
}
close $fh2;
}
=head3 wordsPerTopic
( run in 0.977 second using v1.01-cache-2.11-cpan-cdf2f3d4e48 )