Freq

 view release on metacpan or  search on metacpan

Freq.pm  view on Meta::CPAN

sub _configure {
    my $path = shift;
    my $self = {};    

    # File "conf" contains seg_max_words, nwords.

    open CONF, "<$path/conf" or die $!;
    while(<CONF>){
        next if m|^#|;
        chomp;
        my ($key, $value) = split m|:|;
        $self->{$key} = $value;
    }
    close CONF;

    return $self;
}



sub optimize_index {

bin/tokenize-sb  view on Meta::CPAN

        print "\n<DOC>\n<DOCNO>$1</DOCNO>\n";
    }
    else {
        next; 
    }
    $_ = $1 if /<TEXT>(.+?)<\/TEXT>/ms;
    s|<[^>]+>||g; # Get rid of all other tags.
    #s|(\d)| $1 |g; # Count each digit.
    s|_| |g;       # underscores bah!
    $_ = lc $_;
    my @tokens = split m|\b|;
    my $offset = 0;
    # if the first token is a word, start the $offset early
    unless($tokens[0] =~ /\w/){
        $offset = length($tokens[0]);
        shift @tokens;
    }
    my @locations = ();
    while(@tokens){
        my $word = shift @tokens;
        my $junk = shift @tokens;



( run in 0.918 second using v1.01-cache-2.11-cpan-71847e10f99 )