Acme-Samurai

 view release on metacpan or  search on metacpan

lib/Acme/Samurai.pm  view on Meta::CPAN

package Acme::Samurai;
use 5.010001;
use strict;
use warnings;
use utf8;
our $VERSION = '0.04';

use File::ShareDir qw/dist_file/;
use Lingua::JA::Alphabet::Yomi qw/alphabet2yomi/;
use Lingua::JA::Numbers qw/num2ja/;
use Unicode::Japanese qw/unijp/;

use Text::Mecabist;

sub gozaru {
    my $self = bless { }, shift;
    my $text = shift // "";

    my $parser = Text::Mecabist->new({
        node_format => '%m,%H',
        unk_format  => '%m,%H',
        bos_format  => '%m,%H',
        eos_format  => '%m,%H',
        userdic     => dist_file('Acme-Samurai', Text::Mecabist->encoding->name . '.dic'),
    });

    # natukashi
    $text = unijp($text)->z2hNum->h2zAlpha->getu;

    my $doc = $parser->parse($text, sub {
        my $node = shift;
        $self->apply_rules($node);
    });
    
    return $self->finalize($doc);
}

sub apply_rules {
    my ($self, $node) = @_;
    
    return if not $node->readable;
    
    my $text = $node->text;

    # one to one custom dictionary
    if ($node->extra) {
        $text = $node->extra;
    }
    
    if ($node->is('名詞') or $node->is('記号')) {
        
        # arabic number to kanji
        if ($node->pos1 eq 'æ•°' and $node->surface =~ /^[0-9]+$/) {
            # no 位
            if ($node->surface =~ /^0/ or
                $node->prev && $node->prev->surface =~ /[..]/) {
                
                $text = join "", map { num2ja($_) } split //, $node->surface;
            } else {
                $text = num2ja($node->surface); # with 位
            }
        }
        
        # kanji number to more classic
        elsif ($node->pos1 eq 'æ•°') {
            $text =~ tr{〇一二三四五六七八九十百万}
                       {零壱弐参四伍六七八九拾佰萬};
        }
        
        # roman
        elsif ($text =~ /^\p{Latin}+$/) {
            $text = $node->pronunciation if $node->pronunciation;
            $text = alphabet2yomi($text, 'en');
            $text = unijp($text)->kata2hira->getu;
        }
    }    



( run in 2.684 seconds using v1.01-cache-2.11-cpan-0d23b851a93 )