AI-Classifier-Japanese

 view release on metacpan or  search on metacpan

Build.PL  view on Meta::CPAN


    
);
if (-d 'share') {
    $args{share_dir} = 'share';
}

my $builder = Module::Build->subclass(
    class => 'MyBuilder',
    code => q{
        sub ACTION_distmeta {
            die "Do not run distmeta. Install Minilla and `minil install` instead.\n";
        }
        sub ACTION_installdeps {
            die "Do not run installdeps. Run `cpanm --installdeps .` instead.\n";
        }
    }
)->new(%args);
$builder->create_build_script();

my $mbmeta = CPAN::Meta->load_file('MYMETA.json');
my $meta = CPAN::Meta->load_file('META.json');
my $prereqs_hash = CPAN::Meta::Prereqs->new(
    $meta->prereqs

cpanfile  view on Meta::CPAN

requires 'perl', '5.008001';

requires 'Mouse';
requires 'Algorithm::NaiveBayes';
requires 'Text::MeCab';

on 'test' => sub {
    requires 'Test::More', '0.98';
    requires 'Test::File';
};

lib/AI/Classifier/Japanese.pm  view on Meta::CPAN

use warnings;

our $VERSION = "0.01";

use Mouse;
use Text::MeCab;
use Algorithm::NaiveBayes;

my $nb = Algorithm::NaiveBayes->new;

sub add_training_text {
  my ($self, $text, $category) = @_;

  my $words_freq_ref = &_convert_text_to_bow($text);
  $nb->add_instance(
    attributes => $words_freq_ref,
    label      => $category
  );
}

sub train {
  $nb->train;
}

sub labels {
  $nb->labels;
}

sub predict {
  my ($self, $text) = @_;

  my $words_freq_ref = &_convert_text_to_bow($text);
  my $result_ref = $nb->predict(
    attributes => $words_freq_ref
  );
}

sub _convert_text_to_bow {
  my $text = shift;

  my $words_ref = &_parse_text($text);
  my $words_freq_ref = {};
  foreach (@$words_ref) {
    $words_freq_ref->{$_}++;
  }
  return $words_freq_ref;
}

sub _parse_text {
  my $text = shift;

  my $mecab = Text::MeCab->new();
  my $node = $mecab->parse($text);
  my $words_ref = [];

  while ($node) {
    if (&_is_keyword($node->posid)) {
      push @$words_ref, $node->surface;
    }
    $node = $node->next;
  }
  return $words_ref;
}

sub save_state {
  my ($self, $path) = @_;
  $nb->save_state($path);
}

sub restore_state {
  my ($self, $path) = @_;
  $nb = Algorithm::NaiveBayes->restore_state($path);
}

sub _is_keyword {
  my $posid = shift;

  return &_is_noun($posid) || &_is_verb($posid) || &_is_adj($posid);
}

# See: http://mecab.googlecode.com/svn/trunk/mecab/doc/posid.html
sub _is_interjection {
  return $_[0] == 2;
}
sub _is_adj {
  return 10 <= $_[0] && $_[0] < 13;
}
sub _is_aux {
  return $_[0] == 25;
}
sub _is_conjunction {
  return $_[0] == 26;
}
sub _is_particls {
  return 27 <= $_[0] && $_[0] < 31;
}
sub _is_verb {
  return 31 <= $_[0] && $_[0] < 34;
}
sub _is_noun {
  return 36 <= $_[0] && $_[0] < 68;
}
sub _is_prenominal_adj {
  return $_[0] == 68;
}

__PACKAGE__->meta->make_immutable();

1;
__END__

=encoding utf-8



( run in 0.285 second using v1.01-cache-2.11-cpan-a5abf4f5562 )