Algorithm-LDA
view release on metacpan or search on metacpan
lib/Algorithm/LDA.pm view on Meta::CPAN
$map{$wrd}++;
}
}
#Remove words that appear in more than half of the corpus, and less than $threshold documents
#Also remove words of less than three letters
my $D = @documents;
for my $wd (0..$#words)
{
my $times = $map{$words[$wd]};
my $test = ($times > 0.5*$D || $times<=$threshold || length($words[$wd]) <=3);
if($test)
{
$words[$wd]=0;
}
}
#Repopulate %vocabulary with cleaned words
@words = grep { $_ } (@words);
@words = uniq(@words);
lib/Algorithm/LDA.pm view on Meta::CPAN
}
#remove the / s from beginning and end
s/^\///;
s/\/$//;
#form a single big regex
$stop_regex.="(".$_.")|";
}
if(length($stop_regex)<=0)
{
print STDERR "No valid Perl Regular Experssion found in Stop file $stop";
exit;
}
chop $stop_regex;
# making AND a default stop mode
if(!defined $stop_mode)
{
( run in 0.225 second using v1.01-cache-2.11-cpan-3cd7ad12f66 )