Lingua-TFIDF
view release on metacpan - search on metacpan
view release on metacpan or search on metacpan
lib/Lingua/TFIDF/WordSegmenter/LetterNgram.pm view on Meta::CPAN
package Lingua::TFIDF::WordSegmenter::LetterNgram;
# ABSTRACT: Letter N-gram segmenter
use strict;
use warnings;
use Carp qw//;
use Smart::Args;
sub new {
args
my $class => 'ClassName',
my $n => 'Int';
Carp::croak('Word length must be 1+.') if $n <= 0;
bless +{ n => $n } => $class;
}
sub n { $_[0]->{n} }
sub segment {
args_pos
my $self,
my $document => 'Ref | Str';
$document = \"$document" unless ref $document;
my $length = length $$document;
my $index = -1;
my $n = $self->n;
sub {
GET_NEXT_NGRAM:
{
++$index;
return if $index + $n > $length;
my $ngram = substr $$document, $index, $n;
redo GET_NEXT_NGRAM if $ngram =~ /\s/;
return $ngram;
}
};
}
1;
__END__
=pod
=encoding UTF-8
=head1 NAME
Lingua::TFIDF::WordSegmenter::LetterNgram - Letter N-gram segmenter
=head1 VERSION
version 0.01
=head1 SYNOPSIS
use Lingua::TFIDF::WordSegmenter::LetterNgram;
my $segmenter = Lingua::TFIDF::WordSegmenter::LetterNgram->new(n => 2);
my $iter = $segmenter->segment('ãã³ãã³æ©è½ã¡ããè½ã¡ããè½ã¡ã...');
while (defined(my $word = $iter->())) { ... }
=head1 DESCRIPTION
This class provides a N-gram word segmenter.
=head1 METHODS
=head2 new(n => $n)
Constructor.
view all matches for this distributionview release on metacpan - search on metacpan
( run in 1.505 second using v1.00-cache-2.02-grep-82fe00e-cpan-48ebf85a1963 )