Freq
view release on metacpan or search on metacpan
sub _configure {
my $path = shift;
my $self = {};
# File "conf" contains seg_max_words, nwords.
open CONF, "<$path/conf" or die $!;
while(<CONF>){
next if m|^#|;
chomp;
my ($key, $value) = split m|:|;
$self->{$key} = $value;
}
close CONF;
return $self;
}
sub optimize_index {
bin/tokenize-sb view on Meta::CPAN
print "\n<DOC>\n<DOCNO>$1</DOCNO>\n";
}
else {
next;
}
$_ = $1 if /<TEXT>(.+?)<\/TEXT>/ms;
s|<[^>]+>||g; # Get rid of all other tags.
#s|(\d)| $1 |g; # Count each digit.
s|_| |g; # underscores bah!
$_ = lc $_;
my @tokens = split m|\b|;
my $offset = 0;
# if the first token is a word, start the $offset early
unless($tokens[0] =~ /\w/){
$offset = length($tokens[0]);
shift @tokens;
}
my @locations = ();
while(@tokens){
my $word = shift @tokens;
my $junk = shift @tokens;
( run in 1.363 second using v1.01-cache-2.11-cpan-71847e10f99 )