Algorithm-LDA

 view release on metacpan or  search on metacpan

lib/Algorithm/LDA.pm  view on Meta::CPAN

            $map{$wrd}++; 
        }
    }
    
    #Remove words that appear in more than half of the corpus, and less than $threshold documents
    #Also remove words of less than three letters
    my $D = @documents;
    for my $wd (0..$#words) 
    {
        my $times = $map{$words[$wd]};
        my $test = ($times > 0.5*$D  || $times<=$threshold || length($words[$wd]) <=3);    
	
        if($test)   
        {   
            $words[$wd]=0;
        }
    }
    
    #Repopulate %vocabulary with cleaned words
    @words = grep { $_ } (@words);
    @words = uniq(@words);

lib/Algorithm/LDA.pm  view on Meta::CPAN

        }

        #remove the / s from beginning and end
        s/^\///;
        s/\/$//;
        
	#form a single big regex
        $stop_regex.="(".$_.")|";
    }

    if(length($stop_regex)<=0) 
    {
	print STDERR "No valid Perl Regular Experssion found in Stop file $stop";
	exit;
    }
    
    chop $stop_regex;
    
    # making AND a default stop mode
    if(!defined $stop_mode) 
    {



( run in 0.225 second using v1.01-cache-2.11-cpan-3cd7ad12f66 )