Acme-Samurai
view release on metacpan or search on metacpan
lib/Acme/Samurai.pm view on Meta::CPAN
package Acme::Samurai;
use 5.010001;
use strict;
use warnings;
use utf8;
our $VERSION = '0.04';
use File::ShareDir qw/dist_file/;
use Lingua::JA::Alphabet::Yomi qw/alphabet2yomi/;
use Lingua::JA::Numbers qw/num2ja/;
use Unicode::Japanese qw/unijp/;
use Text::Mecabist;
sub gozaru {
my $self = bless { }, shift;
my $text = shift // "";
my $parser = Text::Mecabist->new({
node_format => '%m,%H',
unk_format => '%m,%H',
bos_format => '%m,%H',
eos_format => '%m,%H',
userdic => dist_file('Acme-Samurai', Text::Mecabist->encoding->name . '.dic'),
});
# natukashi
$text = unijp($text)->z2hNum->h2zAlpha->getu;
my $doc = $parser->parse($text, sub {
my $node = shift;
$self->apply_rules($node);
});
return $self->finalize($doc);
}
sub apply_rules {
my ($self, $node) = @_;
return if not $node->readable;
my $text = $node->text;
# one to one custom dictionary
if ($node->extra) {
$text = $node->extra;
}
if ($node->is('åè©') or $node->is('è¨å·')) {
# arabic number to kanji
if ($node->pos1 eq 'æ°' and $node->surface =~ /^[0-9]+$/) {
# no ä½
if ($node->surface =~ /^0/ or
$node->prev && $node->prev->surface =~ /[.ï¼]/) {
$text = join "", map { num2ja($_) } split //, $node->surface;
} else {
$text = num2ja($node->surface); # with ä½
}
}
# kanji number to more classic
elsif ($node->pos1 eq 'æ°') {
lib/Acme/Samurai.pm view on Meta::CPAN
$node->prev->surface eq 'ã®') {
$text = 'ã ãª';
}
}
if ($node->is('å©åè©')) {
if ($text eq 'ãªã') {
if ($node->prev and
$node->prev->surface eq 'ã' and
$node->next and
$node->next->surface and
$node->next->pos !~ /è©/) {
$node->prev->skip(1);
$text = 'ãã¬';
}
if ($node->prev and
$node->prev->surface ne 'ã' and
$node->prev->inflection_form eq 'æªç¶å½¢') {
$text = 'ã¬';
}
}
elsif ($text eq 'ãªãã') {
if ($node->prev and
$node->prev->surface eq 'ã') {
$node->prev->skip(1);
$text = 'ãã';
}
}
}
if ($node->is('æåè©')) {
if ($node->next and
$node->next->pos !~ /è©/) {
$text = $node->extra if $node->extra;
$text .= 'ã§ããã';
}
}
$node->text($text);
}
sub finalize {
my ($self, $doc) = @_;
my $text = $doc->join('text');
$text =~ s/(?:ã¦ããã¾ãã|ã¦ãã¾ãã?)\b/ã¦ããå/g;
$text =~ s/(?:ã©ãã)?ãããããªã(?:ãããã¾(?:ãã|ã|ãã))?/ãããããªã/g;
$text;
}
1;
__END__
=encoding utf-8
=head1 NAME
Acme::Samurai - Speak like a Samurai
=head1 SYNOPSIS
use utf8;
use Acme::Samurai;
Acme::Samurai->gozaru("ç§ãä¾ã§ã"); # => "ãããããä¾ã§ããã"
=head1 DESCRIPTION
Translates Japanese to æä»£å
(L<http://en.wikipedia.org/wiki/Jidaigeki>) speak.
Test form: L<http://samurai.koneta.org/>
=head1 METHODS
=over 4
=item gozaru( $text )
=back
=head1 AUTHOR
Naoki Tomita E<lt>tomita@cpan.orgE<gt>
=head1 SPECIAL THANKS
kazina, this module started from ã¦ãããã¼ dictionary.
L<http://kazina.com/texter/index.html>
and Hiroko Nagashima, Shin Yamauchi for addition samurai vocabulary.
=head1 LICENSE
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=for stopwords hiroko nagashima shin yamauchi de gozaru kazina
=cut
( run in 2.439 seconds using v1.01-cache-2.11-cpan-75ffa21a3d4 )