view release on metacpan or search on metacpan
Build.PL
Changes
LICENSE
META.json
README.md
author/bench.pl
author/bench.txt
author/cp932-utf8.pl
author/cp932-utf8.txt
author/euc2utf8.pl
author/sjis2utf8.pl
cpanfile
lib/AozoraBunko/Checkerkun.pm
minil.toml
share/hiden_no_tare.yml
t/00_compile.t
t/01_basic.t
t/02_options.t
t/03_html_output.t
t/04_kouetsu.t
t/09_duplication.t
# NAME
AozoraBunko::Checkerkun - é空æåº«ã®å·¥ä½å¡ã®ããã®æåãã§ãã«ã¼ï¼ä½ï¼çµå浩ï¼ãã©ã¤ãã©ãªåãããã®
# SYNOPSIS
use AozoraBunko::Checkerkun;
use utf8;
my $checker1 = AozoraBunko::Checkerkun->new;
$checker1->check('森â»ï¼»ï¼ãåï¼é³¥ãã第3æ°´æº1-94-69ï¼½å¤ð'); # => '森â»ï¼»ï¼ãåï¼é³¥ãã第3æ°´æº1-94-69ï¼½â[78hosetsu_tekiyo]ãé´ãå¤ð[gaiji]'
$checker1->check('森é·å¤'); # => '森é·[gaiji]å¤'
$checker1->check('森é´å¤'); # => '森é´å¤'
my $checker2 = AozoraBunko::Checkerkun->new({ output_format => 'html', gonin1 => 1, gonin2 => 1, gonin3 => 1 });
$checker2->check('æ¡ãããæ±ãå£ããã'); # => '<span data-checkerkun-tag="gonin3" data-checkerkun-message="ãã¤ã">æ¡</span>ããã<span data-checkerkun-tag="gonin3" data-checkerkun-message="ã¯ãã">æ±</span>ãå£ããã'
my $checker3 = AozoraBunko::Checkerkun->new({ kouetsukun => 1 });
author/bench.pl view on Meta::CPAN
#!/usr/bin/env perl
use strict;
use warnings;
use utf8;
use Encode qw//;
use Benchmark qw/cmpthese/;
my $ENC = Encode::find_encoding("Shift_JIS");
my $char = 'ð';
cmpthese(-1, {
'is_gaijiA', sub { is_gaijiA($char) },
'is_gaijiB', sub { is_gaijiB($char) },
});
author/cp932-utf8.pl view on Meta::CPAN
#!/usr/bin/env perl
use strict;
use warnings;
use utf8;
use open OUT => qw/:utf8 :std/;
use feature qw/say/;
use Encode qw//;
my %CP932;
my %SJIS;
my $ENC_CP932 = Encode::find_encoding("CP932");
my $ENC_SJIS = Encode::find_encoding("Shift_JIS");
for my $dec ( hex('0') .. hex('10FFFF') ) # ãµãã²ã¼ãæåãå«ãã§ãã
author/euc2utf8.pl view on Meta::CPAN
#!/usr/bin/env perl
# Usage: ./euc2utf8.pl < input_filename > output_filename
use strict;
use warnings;
use Encode qw//;
local $_ = do { local $/; <> };
$_ = Encode::decode('euc-jp', $_, Encode::FB_WARN);
print Encode::encode('utf-8', $_, Encode::FB_WARN);
author/sjis2utf8.pl view on Meta::CPAN
#!/usr/bin/env perl
# Usage: ./sjis2utf8.pl < input_filename > output_filename
use strict;
use warnings;
use Encode qw//;
local $_ = do { local $/; <> };
$_ = Encode::decode('cp932', $_, Encode::FB_WARN);
print Encode::encode('utf-8', $_, Encode::FB_WARN);
lib/AozoraBunko/Checkerkun.pm view on Meta::CPAN
package AozoraBunko::Checkerkun;
our $VERSION = "0.12";
use 5.008001;
use strict;
use warnings;
use utf8;
use Carp qw//;
use File::ShareDir qw//;
use YAML::Tiny qw//;
use Encode qw//;
use Lingua::JA::Halfwidth::Katakana;
my $YAML_FILE = File::ShareDir::dist_file('AozoraBunko-Checkerkun', 'hiden_no_tare.yml');
my $YAML = YAML::Tiny->read($YAML_FILE)->[0];
my $ENC = Encode::find_encoding("Shift_JIS");
lib/AozoraBunko/Checkerkun.pm view on Meta::CPAN
=encoding utf-8
=head1 NAME
AozoraBunko::Checkerkun - é空æåº«ã®å·¥ä½å¡ã®ããã®æåãã§ãã«ã¼ï¼ä½ï¼çµå浩ï¼ãã©ã¤ãã©ãªåãããã®
=head1 SYNOPSIS
use AozoraBunko::Checkerkun;
use utf8;
my $checker1 = AozoraBunko::Checkerkun->new;
$checker1->check('森â»ï¼»ï¼ãåï¼é³¥ãã第3æ°´æº1-94-69ï¼½å¤ð'); # => '森â»ï¼»ï¼ãåï¼é³¥ãã第3æ°´æº1-94-69ï¼½â[78hosetsu_tekiyo]ãé´ãå¤ð[gaiji]'
$checker1->check('森é·å¤'); # => '森é·[gaiji]å¤'
$checker1->check('森é´å¤'); # => '森é´å¤'
my $checker2 = AozoraBunko::Checkerkun->new({ output_format => 'html', gonin1 => 1, gonin2 => 1, gonin3 => 1 });
$checker2->check('æ¡ãããæ±ãå£ããã'); # => '<span data-checkerkun-tag="gonin3" data-checkerkun-message="ãã¤ã">æ¡</span>ããã<span data-checkerkun-tag="gonin3" data-checkerkun-message="ã¯ãã">æ±</span>ãå£ããã'
my $checker3 = AozoraBunko::Checkerkun->new({ kouetsukun => 1 });
t/01_basic.t view on Meta::CPAN
use strict;
use warnings;
use utf8;
use AozoraBunko::Checkerkun;
use Test::More;
use Test::Fatal;
binmode Test::More->builder->$_ => ':utf8' for qw/output failure_output todo_output/;
can_ok('AozoraBunko::Checkerkun', qw/new check/);
my %option = (
'gaiji' => 1, # JISå¤åããã§ãã¯ãã
'hansp' => 1, # åè§ã¹ãã¼ã¹ããã§ãã¯ãã
'hanpar' => 1, # åè§ã«ãã³ããã§ãã¯ãã
'zensp' => 0, # å
¨è§ã¹ãã¼ã¹ããã§ãã¯ãã
'zentilde' => 1, # å
¨è§ãã«ãããã§ãã¯ãã
'78hosetsu_tekiyo' => 1, # 78äºæå
æã®å¯¾è±¡ã¨ãªãä¸è¦ãªå¤å注è¨ããã§ãã¯ãã
t/02_options.t view on Meta::CPAN
use strict;
use warnings;
use utf8;
use AozoraBunko::Checkerkun;
use Encode qw//;
use Test::More;
use Test::Fatal;
binmode Test::More->builder->$_ => ':utf8' for qw/output failure_output todo_output/;
my %option = (
'gaiji' => 0, # JISå¤åããã§ãã¯ãã
'hansp' => 0, # åè§ã¹ãã¼ã¹ããã§ãã¯ãã
'hanpar' => 0, # åè§ã«ãã³ããã§ãã¯ãã
'zensp' => 0, # å
¨è§ã¹ãã¼ã¹ããã§ãã¯ãã
'zentilde' => 0, # å
¨è§ãã«ãããã§ãã¯ãã
'78hosetsu_tekiyo' => 0, # 78äºæå
æã®å¯¾è±¡ã¨ãªãä¸è¦ãªå¤å注è¨ããã§ãã¯ãã
'hosetsu_tekiyo' => 0, # å
æã®å¯¾è±¡ã¨ãªãä¸è¦ãªå¤å注è¨ããã§ãã¯ãã
'78' => 0, # 78äºæå
æ29åããã§ãã¯ãã
t/03_html_output.t view on Meta::CPAN
use strict;
use warnings;
use utf8;
use AozoraBunko::Checkerkun;
use Test::More;
binmode Test::More->builder->$_ => ':utf8' for qw/output failure_output todo_output/;
my %option = (
'gaiji' => 0, # JISå¤åããã§ãã¯ãã
'hansp' => 0, # åè§ã¹ãã¼ã¹ããã§ãã¯ãã
'hanpar' => 0, # åè§ã«ãã³ããã§ãã¯ãã
'zensp' => 0, # å
¨è§ã¹ãã¼ã¹ããã§ãã¯ãã
'zentilde' => 0, # å
¨è§ãã«ãããã§ãã¯ãã
'78hosetsu_tekiyo' => 0, # 78äºæå
æã®å¯¾è±¡ã¨ãªãä¸è¦ãªå¤å注è¨ããã§ãã¯ãã
'hosetsu_tekiyo' => 0, # å
æã®å¯¾è±¡ã¨ãªãä¸è¦ãªå¤å注è¨ããã§ãã¯ãã
'78' => 0, # 78äºæå
æ29åããã§ãã¯ãã
t/04_kouetsu.t view on Meta::CPAN
use strict;
use warnings;
use utf8;
use AozoraBunko::Checkerkun;
use Test::More;
binmode Test::More->builder->$_ => ':utf8' for qw/output failure_output todo_output/;
my %option = (
'gaiji' => 0, # JISå¤åããã§ãã¯ãã
'hansp' => 0, # åè§ã¹ãã¼ã¹ããã§ãã¯ãã
'hanpar' => 0, # åè§ã«ãã³ããã§ãã¯ãã
'zensp' => 0, # å
¨è§ã¹ãã¼ã¹ããã§ãã¯ãã
'78hosetsu_tekiyo' => 0, # 78äºæå
æã®å¯¾è±¡ã¨ãªãä¸è¦ãªå¤å注è¨ããã§ãã¯ãã
'hosetsu_tekiyo' => 0, # å
æã®å¯¾è±¡ã¨ãªãä¸è¦ãªå¤å注è¨ããã§ãã¯ãã
'78' => 0, # 78äºæå
æ29åããã§ãã¯ãã
'jyogai' => 0, # æ°JISæ¼¢åã§å
æè¦æºã®é©ç¨é¤å¤ã¨ãªã104åããã§ãã¯ãã
t/09_duplication.t view on Meta::CPAN
use strict;
use warnings;
use utf8;
use AozoraBunko::Checkerkun;
use Test::More;
binmode Test::More->builder->$_ => ':utf8' for qw/output failure_output todo_output/;
subtest 'duplication check for all hiden_no_tare chars' => sub {
plan skip_all => 'duplications are allowed'; # è¤æ°ã®ã¿ã°ãã¤ãããã¨ã§å¯¾å¦ãã
my @key_list = (
keys %{$AozoraBunko::Checkerkun::JYOGAI}
, keys %{$AozoraBunko::Checkerkun::J78}
, keys %{$AozoraBunko::Checkerkun::GONIN1}
, keys %{$AozoraBunko::Checkerkun::GONIN2}
, keys %{$AozoraBunko::Checkerkun::GONIN3}
t/10_control_chars_not_gaiji.t view on Meta::CPAN
use strict;
use warnings;
use utf8;
use AozoraBunko::Checkerkun;
use Test::More;
binmode Test::More->builder->$_ => ':utf8' for qw/output failure_output todo_output/;
my @codepoint_list = (
hex('0000') .. hex('0009')
, hex('000B') .. hex('000C')
, hex('000E') .. hex('001F')
, hex('007F') .. hex('009F')
);
subtest 'check gaiji' => sub {
plan skip_all => 'control chars are allowd since they are marked as "ctrl"';
t/20_jis_x_0208.t view on Meta::CPAN
use strict;
use warnings;
use utf8;
use AozoraBunko::Checkerkun;
use Test::More;
binmode Test::More->builder->$_ => ':utf8' for qw/output failure_output todo_output/;
subtest 'JIS X 0208-1983' => sub {
ok( ! AozoraBunko::Checkerkun::_is_gaiji('é´') );
ok( AozoraBunko::Checkerkun::_is_gaiji('é·') );
};
subtest 'JIS X 0208:1990' => sub {
ok( ! AozoraBunko::Checkerkun::_is_gaiji('ç') );
ok( ! AozoraBunko::Checkerkun::_is_gaiji('å') );
};