Acme-Nyaa

 view release on metacpan or  search on metacpan

Build.PL  view on Meta::CPAN

# =========================================================================
# THIS FILE IS AUTOMATICALLY GENERATED BY MINILLA.
# DO NOT EDIT DIRECTLY.
# =========================================================================

use 5.008_001;

use strict;
use warnings;
use utf8;

use Module::Build;
use File::Basename;
use File::Spec;
use CPAN::Meta;
use CPAN::Meta::Prereqs;

my %args = (
    license              => 'perl',
    dynamic_config       => 0,

bin/nyaa  view on Meta::CPAN

#!/usr/bin/env perl
# $Id: nyaa.PL,v 1.2 2011/02/11 10:20:26 ak Exp $
use strict;
use warnings;
use utf8;
use Acme::Nyaa;
use IO::File;
use Encode;
use Getopt::Long qw(:config posix_default no_ignore_case gnu_compat);
use File::Basename qw(basename);

BEGIN {
    my $modulelist = [ 'Acme::Nyaa' ];

    if( defined $ARGV[0] ) {

lib/Acme/Nyaa.pm  view on Meta::CPAN

package Acme::Nyaa;
use strict;
use warnings;
use utf8;
use 5.010001;
use Encode;
use Module::Load;

use version; our $VERSION = qv('0.0.10');
my $Default = 'ja';

sub new {
    # Constructor of Acme::Nyaa
    my $class = shift;
    my $argvs = { @_ };

    return $class if ref $class eq __PACKAGE__;
    $argvs->{'objects'} = [];
    $argvs->{'language'} ||= $Default;
    $argvs->{'loaded-languages'} = [];
    $argvs->{'objectid'} = int rand 2**24;
    $argvs->{'encoding'} = q();
    $argvs->{'utf8flag'} = undef;

    my $nyaan = bless $argvs, __PACKAGE__;
    my $klass = $nyaan->loadmodule( $argvs->{'language'} );
    my $this1 = $nyaan->findobject( $klass, 1 );

    $nyaan->{'subclass'} = $klass;
    return $nyaan;
}

sub subclass {

lib/Acme/Nyaa.pm  view on Meta::CPAN

    push @$objs, $this;
    return $this;
}

sub reckon {
    # Implement at sub class
    my $self = shift;
    return $self->{'encoding'};
}

sub toutf8 {
    my $self = shift;
    my $argv = shift;
    my $text = undef;

    $text = ref $argv ? $$argv : $argv;
    return $text unless length $text;

    $self->reckon( \$text );
    return $text if $self->{'utf8flag'};
    return $text unless $self->{'encoding'};

    if( not $self->{'encoding'} =~ m/(?:ascii|utf8)/ ) {
        Encode::from_to( $text, $self->{'encoding'}, 'utf8' );
    }

    $text = Encode::decode_utf8 $text unless utf8::is_utf8 $text;
    return $text;
}

sub utf8to {
    my $self = shift;
    my $argv = shift;
    my $text = undef;

    $text = ref $argv ? $$argv : $argv;
    return $text unless $self->{'encoding'};
    return $text unless length $text;

    $text = Encode::encode_utf8 $text if utf8::is_utf8 $text;
    if( $self->{'encoding'} ne 'utf8' ) {
        Encode::from_to( $text, 'utf8', $self->{'encoding'} );
    }

    return $text;
}

1;
__END__

=encoding utf8

=head1 NAME

Acme::Nyaa - Convert texts like which a cat is talking in Japanese

=head1 SYNOPSIS

    use Acme::Nyaa;
    my $kijitora = Acme::Nyaa->new;

lib/Acme/Nyaa/Ja.pm  view on Meta::CPAN

package Acme::Nyaa::Ja;
use parent 'Acme::Nyaa';
use strict;
use warnings;
use utf8;

my $RxComma = qr/[、(?:, )]/;
my $RxPeriod = qr/[。!]/;
my $RxEndOfList = qr#[))-=+|}>>/:;"'`\]]#;
my $RxConversation = qr/[「『].+[」』]/;
my $RxEndOfSentence = qr/(?:[!!??…]+|[.]{2,}|[。]{2,}|[、]{2,}|[,]{2,})/;

my $Cats = [ '猫', 'ネコ', 'ねこ' ];
my $Separator = qq(\x1f\x1f\x1f);
my $HiraganaNya = 'にゃ';

lib/Acme/Nyaa/Ja.pm  view on Meta::CPAN

    my $text = undef;
    my $neko = undef;
    my $nyaa = undef;

    return q() if( $ref1 ne '' && $ref1 ne 'SCALAR' );
    $text = $ref1 eq 'SCALAR' ? $$argv: $argv;
    return q() unless length $text;

    eval { 
        $self->reckon( \$text );
        $neko = $self->toutf8( $text );
    };
    return $text if $@;

    $neko =~ s{($RxPeriod)}{$1$Separator}g;
    $neko .= $Separator unless $neko =~ m{$Separator};

    my $hiralength = scalar @$HiraganaTails;
    my $katalength = scalar @$KatakanaTails;
    my $writingset = [ split( $Separator, $neko ) ];
    my $haschomped = 0;

lib/Acme/Nyaa/Ja.pm  view on Meta::CPAN

                    $e =~ s/(\s*?)\z/$KatakanaTails->[ $r1 ]$1/;
                }
            }
        }

        $e =~ s/[!]$RxPeriod/! /g;
        $e .= qq(\n) if $haschomped;

    } # End of for(@$writingset)

    return $self->utf8to( join( '', @$writingset ) ) unless $flag;
    return join( '', @$writingset );
}

sub neko {
    my $self = shift;
    my $argv = shift;
    my $flag = shift // 0;

    my $ref1 = ref $argv;
    my $text = undef;
    my $neko = undef;

    return q() if( $ref1 ne '' && $ref1 ne 'SCALAR' );
    $text = $ref1 eq 'SCALAR' ? $$argv : $argv;
    return q() unless length $text;


    eval { 
        $self->reckon( \$text );
        $neko = $self->toutf8( $text ); 
    };
    return $text if $@;

    my $nounstable = {
        '神' => 'ネコ',
        '神' => 'ネコ',
    };

    for my $e ( keys %$nounstable ) {

lib/Acme/Nyaa/Ja.pm  view on Meta::CPAN

        $neko =~ s{\A[$e](\p{InHiragana})}{$f$1};
        $neko =~ s{\A[$e](\p{InKatakana})}{$f$1};
        $neko =~ s{(\p{InHiragana})[$e](\p{InHiragana})}{$1$f$2}g;
        $neko =~ s{(\p{InHiragana})[$e](\p{InKatakana})}{$1$f$2}g;
        $neko =~ s{(\p{InKatakana})[$e](\p{InKatakana})}{$1$f$2}g;
        $neko =~ s{(\p{InKatakana})[$e](\p{InHiragana})}{$1$f$2}g;
        $neko =~ s{(\p{InHiragana})[$e]($RxPeriod|$RxComma)?\z}{$1$f$2}g;
        $neko =~ s{(\p{InKatakana})[$e]($RxPeriod|$RxComma)?\z}{$1$f$2}g;
    }

    return $self->utf8to( $neko ) unless $flag;
    return $neko;
}

sub nyaa {
    my $self = shift;
    my $argv = shift || q();
    my $text = ref $argv ? $$argv : $argv;
    my $nyaa = [];

    push @$nyaa, @$KatakanaTails, @$HiraganaTails;

lib/Acme/Nyaa/Ja.pm  view on Meta::CPAN

    push @$data, $ref1 eq 'ARRAY' ? @$argv : $$argv;
    return q() unless scalar @$data;

    for my $r ( @$data ) {

        # To be a cat
        if( $r =~ m|[^\x20-\x7e]+| ) {
            # Encode if any multibyte character exsits
            eval { 
                $self->reckon( \$r );
                $nekobuffer .=  $self->toutf8( $r );
            };
            next if $@;

        } else {
            $nekobuffer .= $r;
        }

        for my $e ( keys %$entityrmap ) {
            # Convert character entity reference to character itself.
            next unless $nekobuffer =~ m/$e/;

lib/Acme/Nyaa/Ja.pm  view on Meta::CPAN

            $nekobuffer = $self->neko( \$nekobuffer, 1 ) if $nekobuffer =~ m|[^\x20-\x7e]+|;
            $leftbuffer = $self->neko( \$leftbuffer, 1 ) if $leftbuffer =~ m|[^\x20-\x7e]+|;
        }

        $text .= $nekobuffer;
        $nekobuffer  = $leftbuffer;
        $leftbuffer  = q();
    }

    $text .= $nekobuffer if length $nekobuffer;
    return $self->utf8to( $text );
}

sub reckon {
    # Recognize text encoding
    my $self = shift;
    my $argv = shift;

    my $ref1 = ref $argv;
    my $text = $ref1 eq 'SCALAR' ? $$argv: $argv;
    return q() unless length $text;

    use Encode::Guess qw(shiftjis euc-jp 7bit-jis);
    $self->{'utf8flag'} = utf8::is_utf8 $text;

    my $code = Encode::Guess->guess( $text );
    my $name = q();
    return q() unless ref $code;

    # What encoding
    $name = $code->name;
    $name = $1 if $name =~ m/\A(.+) or .+/;

    if( $name ne 'ascii' ) {
        $self->{'encoding'} ||= $name;
    }
    return $self->{'encoding'};
}

1;

__END__
=encoding utf8

=head1 NAME

Acme::Nyaa - Convert texts like which a cat is talking in Japanese

=head1 SYNOPSIS

    use Acme::Nyaa::Ja;
    my $kijitora = Acme::Nyaa::Ja->new();

t/10_acme-nyaa.t  view on Meta::CPAN

use strict;
use warnings;
use lib './lib';
use utf8;
use Test::More;

BEGIN { use_ok 'Acme::Nyaa' }

my $kijitora = Acme::Nyaa->new;
my $language = [ 'ja' ];
my $cmethods = [ 'new' ];
my $imethods = [ 
    'cat', 'neko', 'nyaa', 'straycat',
    'loadmodule', 'findobject', 'objects', 'subclass',

t/11_acme-nyaa-ja.t  view on Meta::CPAN

use strict;
use warnings;
use utf8;
use lib './lib';
use Test::More;
use Encode;

BEGIN { 
    use_ok 'Acme::Nyaa';
    use_ok 'Acme::Nyaa::Ja';
}

sub e { 
    my $text = shift; 
    my $char = shift || q();

    Encode::from_to( $text, $char, 'utf8' ) if $char;
    utf8::encode $text if utf8::is_utf8 $text;
    return $text;   
}

my $nekotext = 't/cat-related-text.ja.txt';
my $textlist = [];
my $langlist = [ qw|af ar de el en es fa fi fr he hi id is la pt ru th tr zh| ];
my $encoding = [ qw|euc-jp 7bit-jis shiftjis| ];
my $cmethods = [ 'new', 'reckon', 'toutf8', 'utf8to' ];
my $imethods = [ 
    'cat', 'neko', 'nyaa', 'straycat',
    'language', 'findobject', 'objects', 'object',
];
my $sabatora = undef;

$sabatora = Acme::Nyaa->new( 'language' => 'ja' );
isa_ok( $sabatora, 'Acme::Nyaa' );
is( $sabatora->language, 'ja', '->language() = ja' );

$sabatora = Acme::Nyaa::Ja->new;
isa_ok( $sabatora, 'Acme::Nyaa::Ja' );
isa_ok( $sabatora->new, 'Acme::Nyaa::Ja' );
isa_ok( $sabatora->object, 'Acme::Nyaa::Ja' );
isa_ok( $sabatora->objects, 'Acme::Nyaa::Ja' );
isa_ok( $sabatora->findobject, 'Acme::Nyaa::Ja' );
is( $sabatora->language, 'ja', '->language() = ja' );
is( $sabatora->reckon( '猫' ), 'utf8', '->reckon() = utf8' );

can_ok( 'Acme::Nyaa::Ja', @$cmethods );
can_ok( 'Acme::Nyaa::Ja', @$imethods );

ok( -T $nekotext, sprintf( "%s is textfile", $nekotext ) );
ok( -r $nekotext, sprintf( "%s is readable", $nekotext ) );

open( my $fh, '<', $nekotext ) || die 'cannot open '.$nekotext;
$textlist = [ <$fh> ];
ok( scalar @$textlist, sprintf( "%s have %d lines", $nekotext, scalar @$textlist ) );
close $fh;

foreach my $f ( 0, 1 ) {

    foreach my $u ( @$textlist ) {

        my $label = $f ? '->cat(utf8-flagged)' : '->cat(utf8)';
        my ($text0, $text1, $text2, $text3, $text4);
        my ($size0, $size1, $size2, $size3, $size4);

        $text0 = $u; chomp $text0;
        utf8::decode $text0 if $f;

        $text1 = $sabatora->cat( \$text0 );
        utf8::decode $text0 unless utf8::is_utf8 $text0;
        $size0 = length $text0;
        $size1 = length $text1;

        ok( $size1 >= $size0, 
            sprintf( "[1] %s: %s(%d) => %s(%d)", $label, 
                    e($text0), $size0, 
                    e($text1), $size1) );

        $text2 = $sabatora->cat( \$text1 );
        utf8::decode $text1  unless utf8::is_utf8 $text1;
        $size1 = length $text1;
        $size2 = length $text2;
        ok( $size2 >= $size1, sprintf( "[2] %s", $label ) );

        $label = $f ? '->neko(utf8-flagged)' : '->neko(utf8)';
        $text3 = $sabatora->neko( \$text0 );
        utf8::decode $text0 unless utf8::is_utf8 $text0;
        $size0 = length $text0;
        $size3 = length $text3;

        ok( $size3 >= $size0, 
            sprintf( "[1] %s: %s(%d) => %s(%d)", $label, 
                    e($text0), $size0,
                    e($text3), $size3 ) );

        $text4 = $sabatora->neko( \$text3 );
        is( $text4, $text3, sprintf( "[2] %s", $label ) );

t/11_acme-nyaa-ja.t  view on Meta::CPAN


    foreach my $t ( @$textlist ) {

        next unless length $t > 100;
        my $label = sprintf( "->cat(%s)", $e );
        my $guess = undef;
        my ($text0, $text1, $text2, $text3, $text4);
        my ($size0, $size1, $size2, $size3, $size4);

        $text0 = $t; chomp $text0;
        Encode::from_to( $text0, 'utf8', $e );
        $guess = Encode::Guess->guess( $text0 );

        ok( ref $guess, ref $guess );
        ok( $guess->name, $guess->name );
        like( $guess->name, qr/$e/, $e );

        $text1 = $sabatora->cat( \$text0 );
        Encode::from_to( $text0, $e, 'utf8' );
        utf8::decode $text0 unless utf8::is_utf8 $text0;

        ok( length $text1 >= length $text0, sprintf( "[2] %s", $label ) );

        $text2 = $sabatora->cat( \$text1 );
        ok( length $text2 >= length $text1, sprintf( "[2] %s", $label ) );

        $label = sprintf( "->neko(%s)", $e );
        $text3 = $sabatora->neko( \$text0 );
        ok( length $text3 >= length $text0, sprintf( "[2] %s", $label ) ); 

t/11_acme-nyaa-ja.t  view on Meta::CPAN

open( $fh, '<', $nekotext ) || die 'cannot open '.$nekotext;
$textlist = [ <$fh> ];
ok( scalar @$textlist, 
    sprintf( "%s have %d lines", $nekotext, scalar @$textlist ) );
close $fh;

my $text0 = join( '', @$textlist );
my $text1 = $sabatora->straycat( $textlist );
my $text2 = $sabatora->straycat( \$text0 );

utf8::decode $text0 unless utf8::is_utf8 $text0;

ok( length( $text1 ) > length( $text0 ) );
ok( length( $text2 ) > length( $text0 ) );

done_testing();
__END__



( run in 1.468 second using v1.01-cache-2.11-cpan-49f99fa48dc )