Acme-Shukugawa-Atom

 view release on metacpan or  search on metacpan

lib/Acme/Shukugawa/Atom.pm  view on Meta::CPAN

# $Id: /mirror/coderepos/lang/perl/Acme-Shukugawa-Atom/trunk/lib/Acme/Shukugawa/Atom.pm 47728 2008-03-14T01:07:28.622095Z daisuke  $

package Acme::Shukugawa::Atom;
use strict;
use warnings;
use base qw(Class::Accessor::Fast);
use utf8;
use Encode qw(decode_utf8);
use File::ShareDir;
use Text::MeCab;
use YAML ();

our $VERSION = '0.00004';

__PACKAGE__->mk_accessors($_) for qw(custom_words);

# Special case handling -- this could be optimized further
# put it in a sharefile later
our ($CONFIG, @DEFAULT_WORDS, $RE_EXCEPTION, $RE_SMALL, $RE_SYLLABLE, $RE_NBAR);
BEGIN
{
    my $config = YAML::LoadFile( 
        $CONFIG || File::ShareDir::module_file(__PACKAGE__, 'config.yaml') );
    $RE_SMALL    = decode_utf8("[ャュョッー]");
    $RE_SYLLABLE = decode_utf8("(?:.$RE_SMALL?)");
    $RE_NBAR     = decode_utf8("^ンー");
    @DEFAULT_WORDS = map { 
        (decode_utf8($_->[0]), decode_utf8($_->[1]))
    } @{ $config->{custom_words} || [] };
}

sub _create_exception_re
{
    my $self = shift;
    my $custom = $self->custom_words;

    return decode_utf8(join("|",
        map { $custom->[$_ * 2 + 1] } (0..(scalar(@$custom) - 1)/2) ));
}

sub translate
{
    my $self   = shift;
    my $string = decode_utf8(shift);

    if (! ref $self) {
        $self = $self->new({ custom_words => \@DEFAULT_WORDS, @_ });
    }

    # Create local RE_EXCEPTION
    local $RE_EXCEPTION = $self->_create_exception_re;

    $self->preprocess(\$string);
    $self->runthrough(\$string);

lib/Acme/Shukugawa/Atom.pm  view on Meta::CPAN

        }

        if ($text !~ /\S/) {
            $ret .= $text;
            next;
        }

        foreach (my $node = $mecab->parse($text); $node; $node = $node->next) {
            next unless $node->surface;

            my $surface = decode_utf8($node->surface);
            my $feature = decode_utf8($node->feature);
            my ($type, $yomi) = (split(/,/, $feature))[0,8];
# warn "$surface -> $type, $yomi";

            if ($surface eq '上手') {
                $ret .= 'マイウー';
                next;
            }

            if ($type eq '動詞' && $node->next) {
                # 助動詞を計算に入れる
                my $next_feature = decode_utf8($node->next->feature);
                my ($next_type, $next_yomi) = (split(/,/, $next_feature))[0,8];
                if ($next_type eq '助動詞') {
                    $yomi .= $next_yomi;
                    $node = $node->next;
                }
            }

            if ($type =~ /副詞|助動詞|形容詞|接続詞|助詞/ && $surface =~ /^\p{InHiragana}+$/) {
                $ret .= $surface;
            } elsif ($yomi) {

t/01_basic.t  view on Meta::CPAN

use Test::Base;
use utf8;

plan tests => 1 + 1 * blocks;

use_ok("Acme::Shukugawa::Atom");


sub translate {
    Acme::Shukugawa::Atom->translate(shift);
}

t/02_shisu.t  view on Meta::CPAN

use Test::Base;
use utf8;

plan tests => 1 + 1 * blocks;

use_ok("Acme::Shukugawa::Atom");


sub translate {
    Acme::Shukugawa::Atom->translate(shift);
}

t/04_kuribitsu.t  view on Meta::CPAN

use Test::Base;
use utf8;

plan tests => 1 + 1 * blocks;

use_ok("Acme::Shukugawa::Atom");


sub translate {
    Acme::Shukugawa::Atom->translate(shift);
}



( run in 1.087 second using v1.01-cache-2.11-cpan-49f99fa48dc )