Algorithm-LDA

 view release on metacpan or  search on metacpan

lib/Algorithm/LDA.pm  view on Meta::CPAN

459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
        $map{$wrd}++;
    }
}
 
#Remove words that appear in more than half of the corpus, and less than $threshold documents
#Also remove words of less than three letters
my $D = @documents;
for my $wd (0..$#words)
{
    my $times = $map{$words[$wd]};
    my $test = ($times > 0.5*$D  || $times<=$threshold || length($words[$wd]) <=3);   
     
    if($test)  
    {  
        $words[$wd]=0;
    }
}
 
#Repopulate %vocabulary with cleaned words
@words = grep { $_ } (@words);
@words = uniq(@words);

lib/Algorithm/LDA.pm  view on Meta::CPAN

929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
    }
 
    #remove the / s from beginning and end
    s/^\///;
    s/\/$//;
     
    #form a single big regex
    $stop_regex.="(".$_.")|";
}
 
if(length($stop_regex)<=0)
{
    print STDERR "No valid Perl Regular Experssion found in Stop file $stop";
    exit;
}
 
chop $stop_regex;
 
# making AND a default stop mode
if(!defined $stop_mode)
{



( run in 0.252 second using v1.01-cache-2.11-cpan-3cd7ad12f66 )