Acme-Samurai
view release on metacpan or search on metacpan
lib/Acme/Samurai.pm view on Meta::CPAN
package Acme::Samurai;
use 5.010001;
use strict;
use warnings;
use utf8;
our $VERSION = '0.04';
use File::ShareDir qw/dist_file/;
use Lingua::JA::Alphabet::Yomi qw/alphabet2yomi/;
use Lingua::JA::Numbers qw/num2ja/;
use Unicode::Japanese qw/unijp/;
use Text::Mecabist;
sub gozaru {
my $self = bless { }, shift;
my $text = shift // "";
my $parser = Text::Mecabist->new({
node_format => '%m,%H',
unk_format => '%m,%H',
bos_format => '%m,%H',
eos_format => '%m,%H',
userdic => dist_file('Acme-Samurai', Text::Mecabist->encoding->name . '.dic'),
});
# natukashi
$text = unijp($text)->z2hNum->h2zAlpha->getu;
my $doc = $parser->parse($text, sub {
my $node = shift;
$self->apply_rules($node);
});
return $self->finalize($doc);
}
sub apply_rules {
my ($self, $node) = @_;
return if not $node->readable;
my $text = $node->text;
# one to one custom dictionary
if ($node->extra) {
$text = $node->extra;
}
if ($node->is('åè©') or $node->is('è¨å·')) {
# arabic number to kanji
if ($node->pos1 eq 'æ°' and $node->surface =~ /^[0-9]+$/) {
# no ä½
if ($node->surface =~ /^0/ or
$node->prev && $node->prev->surface =~ /[.ï¼]/) {
$text = join "", map { num2ja($_) } split //, $node->surface;
} else {
$text = num2ja($node->surface); # with ä½
}
}
# kanji number to more classic
elsif ($node->pos1 eq 'æ°') {
$text =~ tr{ãä¸äºä¸åäºå
ä¸å
«ä¹åç¾ä¸}
{é¶å£±å¼ååä¼å
ä¸å
«ä¹æ¾ä½°è¬};
}
# roman
elsif ($text =~ /^\p{Latin}+$/) {
$text = $node->pronunciation if $node->pronunciation;
$text = alphabet2yomi($text, 'en');
$text = unijp($text)->kata2hira->getu;
}
}
( run in 2.684 seconds using v1.01-cache-2.11-cpan-0d23b851a93 )