Lingua-FreeLing3
view release on metacpan - search on metacpan
view release on metacpan or search on metacpan
lib/Lingua/FreeLing3/HMMTagger.pm view on Meta::CPAN
States whether words that carry retokenization information (e.g. set
by the dictionary or affix handling modules) must be retokenized (that
is, splitted in two or more words) after the tagging.
=item C<AmbiguityResolution> (option)
States whether and when the tagger must select only one analysis in
case of ambiguity. Possible values are: FORCE_NONE: no selection
forced, words ambiguous after the tagger, remain
ambiguous. FORCE_TAGGER: force selection immediately after tagging,
and before retokenization. FORCE_RETOK: force selection after
retokenization.
=item C<KBest> (integer)
This option, only available with FreeLing 3.1, states how many best
tag sequences the tagger must try to compute. If not specified, this
parameter defaults to 1. Since a sentence may have less possible tag
sequences than the given k value, the results may contain a number of
sequences smaller than k.
=back
=cut
sub new {
my ($class, $lang, %ops) = @_;
my $config = Lingua::FreeLing3::Config->new($lang);
my $file = $config->config("TaggerHMMFile");
unless (-f $file) {
carp "Cannot find hmm_tagger data file. Tried [$file]\n";
return undef;
}
my $retok = Lingua::FreeLing3::_validate_bool($ops{Retokenize},
$config->config('TaggerRetokenize')); # bool
my $ft = $config->config("TaggerForceSelect");
$ft = "FORCE_NONE" if $ft eq "none";
$ft = "FORCE_TAGGER" if $ft eq "tagger";
$ft = "FORCE_RETOK" if $ft eq "retok";
my $amb = Lingua::FreeLing3::_validate_option($ops{AmbiguityResolution},
{
FORCE_NONE => 0,
FORCE_TAGGER => 1,
FORCE_RETOK => 2,
}, $ft);
my $kbest = $ops{KBest} || 1;
my $self;
if (Lingua::FreeLing3::ConfigData->config("fl_minor") == 0) {
$self = $class->SUPER::new($lang, $file, $retok, $amb);
} else {
$self = $class->SUPER::new($file, $retok, $amb, $kbest);
}
return bless $self => $class
}
=head2 C<tag>
Alias to C<analyze>
=cut
sub tag { &analyze }
=head2 C<analyze>
Receives a list of sentences, and returns that same list of sentences
after tagging process. Basically, selected the most probable
(accordingly with the tagger model) analysis for each word.
=cut
sub analyze {
my ($self, $sentences, %opts) = @_;
unless (Lingua::FreeLing3::_is_sentence_list($sentences)) {
carp "Error: analyze argument isn't a list of sentences";
return undef;
}
$sentences = $self->SUPER::analyze($sentences);
for my $s (@$sentences) {
$s->ACQUIRE();
$s = Lingua::FreeLing3::Sentence->_new_from_binding($s);
}
return $sentences;
}
1;
__END__
=head1 SEE ALSO
Lingua::FreeLing3 (3), freeling, perl(1)
=head1 AUTHOR
Alberto Manuel Brandão Simões, E<lt>ambs@cpan.orgE<gt>
Jorge Cunha Mendes E<lt>jorgecunhamendes@gmail.comE<gt>
=head1 COPYRIGHT AND LICENSE
Copyright (C) 2011-2013 by Projecto Natura
=cut
view all matches for this distributionview release on metacpan - search on metacpan
( run in 0.721 second using v1.00-cache-2.02-grep-82fe00e-cpan-3b7f77b76a6c )