Acme-Nyaa
view release on metacpan or search on metacpan
lib/Acme/Nyaa/Ja.pm view on Meta::CPAN
package Acme::Nyaa::Ja;
use parent 'Acme::Nyaa';
use strict;
use warnings;
use utf8;
my $RxComma = qr/[ã(?:, )]/;
my $RxPeriod = qr/[ãï¼]/;
my $RxEndOfList = qr#[ï¼)-=+|}ï¼>/:;"'`\]]#;
my $RxConversation = qr/[ãã].+[ãã]/;
my $RxEndOfSentence = qr/(?:[!ï¼?ï¼â¦]+|[.]{2,}|[ã]{2,}|[ã]{2,}|[,]{2,})/;
my $Cats = [ 'ç«', 'ãã³', 'ãã' ];
my $Separator = qq(\x1f\x1f\x1f);
my $HiraganaNya = 'ã«ã';
my $KatakanaNya = 'ãã£';
my $FightingCats = [
'ããã¼ãªã',
'ããã¼ãª!ã',
'ããã¼ã¼ãªã',
'ããã¼ã¼ãª!ã',
'ããã¼ã¼ã¼ãª!!ã',
'ããã¼ã¼ã¼ã¼ãª!!!ã',
];
my $Copulae = [ 'ã ', 'ã§ã', 'ã§ãã', 'ã©ã', 'ãããããªã', 'ããã', 'ããã§ã' ];
my $HiraganaTails = [
'ã«ã', 'ã«ãã¼', 'ã«ãã', 'ã«ãã¼ã¼ã¼ã¼!', 'ã«ãã', 'ã«ãã¼ã', 'ã«ããã',
'ã«ãã¼!', 'ã«ãã¼ã¼ã¼!!', 'ã«ãã¼ã¼!',
];
my $KatakanaTails = [
'ãã£', 'ãã£ã¼', 'ãã£ã', 'ãã£ã¼ã¼ã¼ã¼!', 'ãã£ã', 'ãã£ã¼ã', 'ãã£ãã',
'ãã£ã¼!', 'ãã£ã¼ã¼ã¼!!', 'ãã£ã¼ã¼!',
];
my $DoNotBecomeCat = [
# See http://ja.wikipedia.org/wiki/ã¢ã¼ãã³ã°å¨ã
'ã¢ã¼ãã³ã°å¨ã',
'ã«ã³ããªã¼å¨ã',
'ã³ã³ãããå¨ã',
'ããã¢ãã',
'ã¨ã³ã¢ãã',
'ããã¼!ã¢ã¼ãã³ã°ã',
'ã¨ã¢ã¢ãã',
'ã¢ã¼ãã³ã°åäºã',
'ã¢ã¼å¨ã',
];
sub new {
# Constructor
my $class = shift;
my $argvs = { @_ };
return $class if ref $class eq __PACKAGE__;
$argvs->{'language'} = 'ja';
return bless $argvs, __PACKAGE__;
}
sub language {
# Set language to use
my $self = shift;
$self->{'language'} ||= 'ja';
return $self->{'language'};
}
sub object {
# Wrapper method for new()
my $self = shift;
return __PACKAGE__->new unless ref $self;
return $self;
}
*objects = *object;
*findobject = *object;
sub cat {
my $self = shift;
my $argv = shift;
my $flag = shift // 0;
my $ref1 = ref $argv;
my $text = undef;
my $neko = undef;
my $nyaa = undef;
return q() if( $ref1 ne '' && $ref1 ne 'SCALAR' );
$text = $ref1 eq 'SCALAR' ? $$argv: $argv;
return q() unless length $text;
eval {
$self->reckon( \$text );
$neko = $self->toutf8( $text );
};
return $text if $@;
$neko =~ s{($RxPeriod)}{$1$Separator}g;
$neko .= $Separator unless $neko =~ m{$Separator};
my $hiralength = scalar @$HiraganaTails;
my $katalength = scalar @$KatakanaTails;
my $writingset = [ split( $Separator, $neko ) ];
my $haschomped = 0;
my ( $r1,$r2 ) = 0;
for my $e ( @$writingset ) {
next if $e =~ m/\A$RxPeriod\s*\z/;
next if $e =~ m/$RxEndOfList\s*\z/;
next if grep { $e =~ m/\A$_\s*/ } @$DoNotBecomeCat;
next if grep { $e =~ m/$_$RxPeriod?\z/ } @$HiraganaTails;
next if grep { $e =~ m/$_$RxPeriod?\z/ } @$KatakanaTails;
next if grep { $e =~ m/$_$RxEndOfSentence?\s*\z/ } @$HiraganaTails;
next if grep { $e =~ m/$_$RxEndOfSentence?\s*\z/ } @$KatakanaTails;
next if grep { $e =~ m/$_\s*\z/ } @$FightingCats;
# Do not convert if the string contain only ASCII characters.
# ASCIIæåããå
¥ã£ã¦ãªãæã¯ä½ãããªã
next if $e =~ m{\A[\x20-\x7E]+\z};
# ã²ãããªãã¾ãã¯ã«ã¿ã«ããå
¥ã£ã¦ãªããªã次ã¸
next unless $e =~ m{[\p{InHiragana}\p{InKatakana}]+};
# Cats may be hard to speak a word which ends with a character 'ã'.
# ãããã®å¾ãã«ãã£ã¼ãããã¨ç«ãåãã«ããã
next if $e =~ m{[ãã]$RxPeriod?\s*\z};
$haschomped = chomp $e;
if( $e =~ m/ãª$RxPeriod?\s*\z/ ) {
# 㪠=> ã«ãã¼
$e =~ s/ãª($RxPeriod?)(\s*)\z/$HiraganaNya$1$2/;
} elsif( $e =~ m/ã$RxPeriod?\s*\z/ ) {
# ã => ãã£ã¼
$e =~ s/ã($RxPeriod?)(\s*)\z/$HiraganaNya$1$2/;
} elsif( $e =~ m/\p{InHiragana}$RxPeriod\s*\z/ ) {
$r1 = int rand $katalength;
$e =~ s/($RxPeriod)(\s*)\z/$KatakanaTails->[ $r1 ]$1$2/;
} elsif( $e =~ m/\p{InKatakana}$RxPeriod\s*\z/ ) {
$r1 = int rand $hiralength;
$e =~ s/($RxPeriod)(\s*)\z/$HiraganaTails->[ $r1 ]$1$2/;
} elsif( $e =~ m/\p{InCJKUnifiedIdeographs}$RxPeriod?\s*\z/ ) {
$r1 = int rand $hiralength;
$r2 = int rand scalar @$Copulae;
$e =~ s/($RxPeriod?)(\s*)\z/$Copulae->[ $r2 ]$KatakanaTails->[ $r1 ]$1$2/;
} else {
if( $e =~ m/($RxEndOfSentence)\s*\z/ ) {
# ... => ãã£ã¼..., ! => ãã£!
my $eos = $1;
if( $e =~ m/\p{InKatakana}$RxEndOfSentence\s*\z/ ) {
$r1 = int rand( $hiralength / 2 );
$e =~ s/$RxEndOfSentence/$HiraganaTails->[ $r1 ]$eos/g;
} elsif( $e =~ m/\p{InHiragana}$RxEndOfSentence\s*\z/ ) {
$r1 = int rand( $katalength / 2 );
$e =~ s/$RxEndOfSentence/$KatakanaTails->[ $r1 ]$eos/g;
} else {
$r1 = int rand( $katalength / 2 );
$r2 = int rand( scalar @$Copulae );
$e =~ s/$RxEndOfSentence/$Copulae->[ $r2 ]$KatakanaTails->[ $r1 ]$eos/g;
}
} elsif( $e =~ m/$RxConversation\s*\z/ ) {
# 0.5ã®ç¢ºçã§ä¼è©±ã®å¾ãã§ç«ãå§å©ããã
if( $e =~ m/\A(.*$RxConversation[ ]*)($RxConversation.*)\s*\z/ ) {
$r1 = int rand scalar @$FightingCats;
$e = $1.$FightingCats->[ $r1 ].$2 if int(rand(10)) % 2;
}
$r1 = int rand scalar @$FightingCats;
$e .= $FightingCats->[ $r1 ] if int(rand(10)) % 2;
} else {
$r1 = int rand $katalength;
if( $e =~ m/[0-9\p{Latin}]\s*\z/ ) {
$r2 = int rand scalar @$Copulae;
$e =~ s/(\s*?)\z/ $Copulae->[ $r2 ]$KatakanaTails->[ $r1 ]$1/;
} elsif( $e =~ m/\p{InKatakana}\s*\z/ ) {
$e =~ s/(\s*?)\z/$HiraganaTails->[ $r1 ]$1/;
} else {
$e =~ s/(\s*?)\z/$KatakanaTails->[ $r1 ]$1/;
}
}
}
$e =~ s/[!]$RxPeriod/! /g;
$e .= qq(\n) if $haschomped;
} # End of for(@$writingset)
return $self->utf8to( join( '', @$writingset ) ) unless $flag;
return join( '', @$writingset );
}
sub neko {
my $self = shift;
my $argv = shift;
my $flag = shift // 0;
my $ref1 = ref $argv;
my $text = undef;
my $neko = undef;
return q() if( $ref1 ne '' && $ref1 ne 'SCALAR' );
$text = $ref1 eq 'SCALAR' ? $$argv : $argv;
return q() unless length $text;
eval {
$self->reckon( \$text );
$neko = $self->toutf8( $text );
};
return $text if $@;
my $nounstable = {
'ç¥' => 'ãã³',
'ï¨' => 'ãã³',
};
for my $e ( keys %$nounstable ) {
next unless $neko =~ m{$e};
my $f = $nounstable->{ $e };
$neko =~ s{\A[$e]\z}{$f};
$neko =~ s{\A[$e](\p{InHiragana})}{$f$1};
$neko =~ s{\A[$e](\p{InKatakana})}{$f$1};
$neko =~ s{(\p{InHiragana})[$e](\p{InHiragana})}{$1$f$2}g;
$neko =~ s{(\p{InHiragana})[$e](\p{InKatakana})}{$1$f$2}g;
$neko =~ s{(\p{InKatakana})[$e](\p{InKatakana})}{$1$f$2}g;
$neko =~ s{(\p{InKatakana})[$e](\p{InHiragana})}{$1$f$2}g;
$neko =~ s{(\p{InHiragana})[$e]($RxPeriod|$RxComma)?\z}{$1$f$2}g;
$neko =~ s{(\p{InKatakana})[$e]($RxPeriod|$RxComma)?\z}{$1$f$2}g;
}
return $self->utf8to( $neko ) unless $flag;
return $neko;
}
sub nyaa {
my $self = shift;
my $argv = shift || q();
my $text = ref $argv ? $$argv : $argv;
my $nyaa = [];
push @$nyaa, @$KatakanaTails, @$HiraganaTails;
return $text.$nyaa->[ int rand( scalar @$nyaa ) ];
}
sub straycat {
my $self = shift;
my $argv = shift // return q();
my $noun = shift // 0;
my $ref1 = ref $argv;
my $data = [];
my $text = q();
my $nekobuffer = q();
my $leftbuffer = q();
my $buffersize = 144;
my $entityrmap = {
'、' => 'ã',
'。' => 'ã',
};
return q() unless $ref1 =~ m/(?:ARRAY|SCALAR)/;
push @$data, $ref1 eq 'ARRAY' ? @$argv : $$argv;
return q() unless scalar @$data;
for my $r ( @$data ) {
# To be a cat
if( $r =~ m|[^\x20-\x7e]+| ) {
# Encode if any multibyte character exsits
eval {
$self->reckon( \$r );
$nekobuffer .= $self->toutf8( $r );
};
next if $@;
} else {
$nekobuffer .= $r;
}
for my $e ( keys %$entityrmap ) {
# Convert character entity reference to character itself.
next unless $nekobuffer =~ m/$e/;
$nekobuffer =~ s/$e/$entityrmap->{ $e }/g;
}
if( length $nekobuffer < $buffersize ) {
if( $nekobuffer =~ m/(.+$RxPeriod)(.*)/msx ) {
$nekobuffer = $1;
$leftbuffer = $2;
} else {
next;
}
}
if( $nekobuffer =~ m|[^\x20-\x7e]+| ) {
# Convert if any multibyte character exsits
$nekobuffer = $self->cat( \$nekobuffer, 1 );
}
if( $noun ) {
# Convert noun
$nekobuffer = $self->neko( \$nekobuffer, 1 ) if $nekobuffer =~ m|[^\x20-\x7e]+|;
$leftbuffer = $self->neko( \$leftbuffer, 1 ) if $leftbuffer =~ m|[^\x20-\x7e]+|;
}
$text .= $nekobuffer;
$nekobuffer = $leftbuffer;
$leftbuffer = q();
}
$text .= $nekobuffer if length $nekobuffer;
return $self->utf8to( $text );
}
sub reckon {
# Recognize text encoding
my $self = shift;
my $argv = shift;
my $ref1 = ref $argv;
my $text = $ref1 eq 'SCALAR' ? $$argv: $argv;
return q() unless length $text;
use Encode::Guess qw(shiftjis euc-jp 7bit-jis);
$self->{'utf8flag'} = utf8::is_utf8 $text;
my $code = Encode::Guess->guess( $text );
my $name = q();
return q() unless ref $code;
# What encoding
$name = $code->name;
$name = $1 if $name =~ m/\A(.+) or .+/;
if( $name ne 'ascii' ) {
$self->{'encoding'} ||= $name;
}
return $self->{'encoding'};
}
1;
__END__
=encoding utf8
=head1 NAME
Acme::Nyaa - Convert texts like which a cat is talking in Japanese
=head1 SYNOPSIS
use Acme::Nyaa::Ja;
my $kijitora = Acme::Nyaa::Ja->new();
# the following code is equivalent to the above.
use Acme::Nyaa;
my $kijitora = Acme::Nyaa->new( 'language' => 'ja' );
print $kijitora->cat( \'ç«ãããããã' ); # => ç«ããããããã£ã¼ã
print $kijitora->neko( \'ç¥ã¨åè§£ãã' ); # => ãã³ã¨åè§£ãã
=head1 DESCRIPTION
Acme::Nyaa is a converter which translate Japanese texts to texts like which a cat talking.
Language modules are available only Japanese (L<Acme::Nyaa::Ja>) for now.
=head1 CLASS METHODS
=head2 B<new()>
new() is a constructor of Acme::Nyaa::Ja
my $kijitora = Acme::Nyaa::Ja->new();
my $sabatora = Acme::Nyaa->new( 'language' => 'ja' );
=head1 INSTANCE METHODS
=head2 B<cat( I<\$text> )>
cat() is a converter that appends string C<ãã£ã¼> at the end of each sentence.
my $kijitora = Acme::Nyaa::Ja->new;
my $nekotext = 'ç«ãããããã';
print $kijitora->cat( \$nekotext );
# ç«ããããããã£ã¼ã¼!!
=head2 B<neko( I<\$text> )>
neko() is a converter that replace a noun with C<ãã³>.
my $kijitora = Acme::Nyaa::Ja->new;
my $nekotext = '人ã®éãè¡ããç¥ã¯è¦ã¦ãã';
print $kijitora->neko( \$nekotext );
# 人ã®éãè¡ãããã³ã¯è¦ã¦ãã
=head2 B<nyaa( [I<\$text>] )>
nyaa() returns string: C<ãã£ã¼>.
my $kijitora = Acme::Nyaa->new;
print $kijitora->nyaa(); # ãã£ã¼
print $kijitora->nyaa('京é½'); # 京é½ã«ãã¼
( run in 0.661 second using v1.01-cache-2.11-cpan-8f98c5d2c55 )