Algorithm-LDA
view release on metacpan or search on metacpan
lib/Algorithm/LDA.pm view on Meta::CPAN
459460461462463464465466467468469470471472473474475476477478479
$map
{
$wrd
}++;
}
}
#Remove words that appear in more than half of the corpus, and less than $threshold documents
#Also remove words of less than three letters
my
$D
=
@documents
;
for
my
$wd
(0..
$#words
)
{
my
$times
=
$map
{
$words
[
$wd
]};
my
$test
= (
$times
> 0.5
*$D
||
$times
<=
$threshold
||
length
(
$words
[
$wd
]) <=3);
if
(
$test
)
{
$words
[
$wd
]=0;
}
}
#Repopulate %vocabulary with cleaned words
@words
=
grep
{
$_
} (
@words
);
@words
= uniq(
@words
);
lib/Algorithm/LDA.pm view on Meta::CPAN
929930931932933934935936937938939940941942943944945946947948949
}
#remove the / s from beginning and end
s/^\///;
s/\/$//;
#form a single big regex
$stop_regex
.=
"("
.
$_
.
")|"
;
}
if
(
length
(
$stop_regex
)<=0)
{
STDERR
"No valid Perl Regular Experssion found in Stop file $stop"
;
exit
;
}
chop
$stop_regex
;
# making AND a default stop mode
if
(!
defined
$stop_mode
)
{
( run in 0.252 second using v1.01-cache-2.11-cpan-3cd7ad12f66 )