AI-Classifier-Japanese
view release on metacpan or search on metacpan
);
if (-d 'share') {
$args{share_dir} = 'share';
}
my $builder = Module::Build->subclass(
class => 'MyBuilder',
code => q{
sub ACTION_distmeta {
die "Do not run distmeta. Install Minilla and `minil install` instead.\n";
}
sub ACTION_installdeps {
die "Do not run installdeps. Run `cpanm --installdeps .` instead.\n";
}
}
)->new(%args);
$builder->create_build_script();
my $mbmeta = CPAN::Meta->load_file('MYMETA.json');
my $meta = CPAN::Meta->load_file('META.json');
my $prereqs_hash = CPAN::Meta::Prereqs->new(
$meta->prereqs
requires 'perl', '5.008001';
requires 'Mouse';
requires 'Algorithm::NaiveBayes';
requires 'Text::MeCab';
on 'test' => sub {
requires 'Test::More', '0.98';
requires 'Test::File';
};
lib/AI/Classifier/Japanese.pm view on Meta::CPAN
use warnings;
our $VERSION = "0.01";
use Mouse;
use Text::MeCab;
use Algorithm::NaiveBayes;
my $nb = Algorithm::NaiveBayes->new;
sub add_training_text {
my ($self, $text, $category) = @_;
my $words_freq_ref = &_convert_text_to_bow($text);
$nb->add_instance(
attributes => $words_freq_ref,
label => $category
);
}
sub train {
$nb->train;
}
sub labels {
$nb->labels;
}
sub predict {
my ($self, $text) = @_;
my $words_freq_ref = &_convert_text_to_bow($text);
my $result_ref = $nb->predict(
attributes => $words_freq_ref
);
}
sub _convert_text_to_bow {
my $text = shift;
my $words_ref = &_parse_text($text);
my $words_freq_ref = {};
foreach (@$words_ref) {
$words_freq_ref->{$_}++;
}
return $words_freq_ref;
}
sub _parse_text {
my $text = shift;
my $mecab = Text::MeCab->new();
my $node = $mecab->parse($text);
my $words_ref = [];
while ($node) {
if (&_is_keyword($node->posid)) {
push @$words_ref, $node->surface;
}
$node = $node->next;
}
return $words_ref;
}
sub save_state {
my ($self, $path) = @_;
$nb->save_state($path);
}
sub restore_state {
my ($self, $path) = @_;
$nb = Algorithm::NaiveBayes->restore_state($path);
}
sub _is_keyword {
my $posid = shift;
return &_is_noun($posid) || &_is_verb($posid) || &_is_adj($posid);
}
# See: http://mecab.googlecode.com/svn/trunk/mecab/doc/posid.html
sub _is_interjection {
return $_[0] == 2;
}
sub _is_adj {
return 10 <= $_[0] && $_[0] < 13;
}
sub _is_aux {
return $_[0] == 25;
}
sub _is_conjunction {
return $_[0] == 26;
}
sub _is_particls {
return 27 <= $_[0] && $_[0] < 31;
}
sub _is_verb {
return 31 <= $_[0] && $_[0] < 34;
}
sub _is_noun {
return 36 <= $_[0] && $_[0] < 68;
}
sub _is_prenominal_adj {
return $_[0] == 68;
}
__PACKAGE__->meta->make_immutable();
1;
__END__
=encoding utf-8
( run in 0.285 second using v1.01-cache-2.11-cpan-a5abf4f5562 )