Algorithm-LDA

 view release on metacpan or  search on metacpan

INSTALL  view on Meta::CPAN

TESTING PLATFORMS
    Algorithm-LDA has been developed and tested on Linux primarily using
    Algorithm, NetBeans 8.1 and the Bash shell.

REQUIREMENTS
    Algorithm-LDA REQUIRES that the following software be installed. More
    details on how to obtain and install appear below.

    --Programming Languages Algorithm (version 5.8.5 or better)

    --CPAN modules JSON::XS List::Util List::MoreUtils Class::Accessor::Fast

PROGRAMMING LANGUAGES
  Algorithm (version 5.8.5 or better)
    Algorithm is freely available at:

    <http://www.perl.org>

    It is very likely that you will already have Algorithm installed if you
    are using a Unix/Linux based system.

CPAN MODULES
  JSON::XS
    Please see the JSON::XS documentation.

  List::Util
    Please see the List::Util documentation.

  List::MoreUtils
    Please see the List::MoreUtils documentation.

  Class::Accessor::Fast
    Please see the Class::Accessor::Fast documentation.

MANIFEST  view on Meta::CPAN

bin/lda.pl
CHANGES
INSTALL
lib/Algorithm/LDA.pm
Makefile.PL
MANIFEST			This list of files
README
t/pod-coverage.t
t/pod.t
META.yml                                 Module YAML meta-data (added by MakeMaker)
META.json                                Module JSON meta-data (added by MakeMaker)

META.json  view on Meta::CPAN

         }
      },
      "configure" : {
         "requires" : {
            "ExtUtils::MakeMaker" : "0"
         }
      },
      "runtime" : {
         "requires" : {
            "Class::Accessor::Fast" : "0",
            "JSON::XS" : "0",
            "List::MoreUtils" : "0",
            "List::Util" : "0"
         }
      }
   },
   "release_status" : "stable",
   "version" : "0.03",
   "x_serialization_backend" : "JSON::PP version 2.27300"
}

META.yml  view on Meta::CPAN

meta-spec:
  url: http://module-build.sourceforge.net/META-spec-v1.4.html
  version: '1.4'
name: Algorithm-LDA
no_index:
  directory:
    - t
    - inc
requires:
  Class::Accessor::Fast: '0'
  JSON::XS: '0'
  List::MoreUtils: '0'
  List::Util: '0'
version: '0.03'
x_serialization_backend: 'CPAN::Meta::YAML version 0.018'

Makefile.PL  view on Meta::CPAN


use ExtUtils::MakeMaker;

my $author1 = 'Bridget McInnes <btmcinnes@vcu.edu';
my $author2 = 'Nick	Jordan'; 

WriteMakefile(
    NAME                => 'Algorithm::LDA',
    VERSION_FROM        => 'lib/Algorithm/LDA.pm',
    PREREQ_PM => {
        'JSON::XS' => 0,
		'List::Util' => 0,
		'List::MoreUtils' => 0,
		'Class::Accessor::Fast' => 0,
    }, 

    EXE_FILES           => [("bin/lda.pl")], 
    dist              => {'COMPRESS' => 'gzip -9f', 'SUFFIX' => 'gz'},
    ($] >= 5.005 ?
     (
      AUTHOR          => "$author1, $author2") : ())); 

bin/lda.pl  view on Meta::CPAN

}

# $stop - Stopword list (regex)
my $stop = shift; 
if(defined $opt_stop) { 
    $stop = $opt_stop; 
}

# TODO -- add as options
# $maxIterations - Maximum Iterations
# $updateCorpus - 1 = Force update documents, 0 = allow loading from JSON
# $wordThreshold - Minimum number of documents a word must appear in
# $alpha - Default Alpha value
# $numWords - Number of words per topic
my $maxIterations = 1000;
if(defined $opt_iterations) { 
    $maxIterations = $opt_iterations; 
}
my $updateCorpus = 0;
my $wordThreshold = 10;
my $alpha = 0.1;

lib/Algorithm/LDA.pm  view on Meta::CPAN

use strict;
use 5.006;
use strict;
use warnings FATAL => 'all';

use constant pi => 4*atan2(1, 1);
use constant e  => exp(1);
use parent qw/Class::Accessor::Fast/;
use List::Util qw(shuffle sum max);
use List::MoreUtils qw(uniq first_index);
use JSON::XS;


use vars qw($VERSION);

$VERSION = '0.03';


#Used for accessing $self->documents
__PACKAGE__->mk_accessors(qw/documents/);

lib/Algorithm/LDA.pm  view on Meta::CPAN


# $V - vocabulary size
# $v - $V-1 (for convenience)
# @alpha - array of alpha values (parameter of topic distribution)
# @theta - array of theta values (topic distribution)
# @beta - array of beta values (parameter of word distribution)
# @phi - array of phi values (word distribution)

# $totalDocs - Total Documents (Only used for computing completeness when loading)
# $maxIterations - Maximum Iterations
# $updateCorpus - 1 = Force update documents, 0 = allow loading from JSON
# $threshold - Minimum number of documents a word must appear in
# $numWords - Number of words per topic
# $alpha - Default alpha value

# $documentNum - Number of documents


my $data;
my $docs;
my $stop; 

lib/Algorithm/LDA.pm  view on Meta::CPAN

    }
    
    close($fh3);
    close($fh4);
}

=head3 load

description:

 Loads documents from text files (in "data/$data") or JSON file (in "Documents")

input:   

 None

output:

 None

example:

 load();

=cut

#Loads document data from files or JSON

sub load
{    
    #open data directory
    opendir(DH, "$docs"); 
    my @files = grep { $_ ne '.' and $_ ne '..' } readdir DH;
    closedir(DH);
    
    #array holding string of words in each document
    my @documents1 = ();

lib/Algorithm/LDA.pm  view on Meta::CPAN

    #Convert words to hashmap (for use of "exists") and remove unclean 
    # words from documents array
    my %h;
    @h{@words} = ();
    for my $i (0..$#documents)
    {
        @{$documents[$i]} = grep{exists $h{$_}} @{$documents[$i]};
    }

    
    open(my $fh, '>', "JSON") or die "Could not open file 'JSON' $!";
	
    foreach my $i (@documents)
    {
	print $fh "{\"data\":[\"" . join("\", \"", @{$i})."\"]}\n";
    }
    close $fh;
	
   
        open(my $fh2, '<', "JSON") or die "Could not open file 'JSON' $!";
        while (my $line = <$fh2>) {
            my $obj = decode_json($line);
            add(%$obj);
        }
        close $fh2;
}


=head3 wordsPerTopic
    



( run in 1.604 second using v1.01-cache-2.11-cpan-140bd7fdf52 )