Aozora2Epub
view release on metacpan or search on metacpan
lib/Aozora2Epub/XHTML.pm view on Meta::CPAN
# åæ¿ç¹ä»ãã«ã¿ã«ã ã kindleã ã¨2æåã«è¦ããã®ãªãã¨ããªãããï¼
return if $men == 1 && $ku == 6 && $ten == 88;
# kindle font of these characters are broken.
return if $men == 1 && $ku == 90 && $ten == 61;
return if $men == 2 && $ku == 15 && $ten == 73;
return jisx0213_to_utf8($men, $ku, $ten);
}
# kindle font of these characters are broken.
our %kindle_broken_font_unicode = map { $_ => 1 } (
0x2152,
0x2189,
0x26bd,
0x26be,
0x3244,
);
our %kindle_ok_font_over0xffff = map { $_ => 1 } (
0x20d58, 0x20e97, 0x20ed7, 0x210e4, 0x2124f, 0x2296b,
0x22d07, 0x22e42, 0x22feb, 0x233fe, 0x23cbe, 0x249ad,
0x24e04, 0x24ff2, 0x2546e, 0x2567f, 0x259cc, 0x2688a,
0x279b4, 0x280e9, 0x28e17, 0x29170, 0x2a2b2,
);
sub kindle_unicode_hex2chr {
my $unicode_hex = shift;
my $unicode = hex($unicode_hex);
return if $kindle_broken_font_unicode{$unicode};
# kindle font is almost not avaliable in this range.
return if $unicode > 0xffff && !$kindle_ok_font_over0xffff{$unicode};
return chr($unicode);
}
sub _conv_gaiji_title_author {
my ($unicode, $men, $ku, $ten) = @_;
if ($unicode) {
my $ch = kindle_unicode_hex2chr($unicode);
return $ch if $ch;
return;
}
my $ch = kindle_jis2chr(0+$men, 0+$ku, 0+$ten);
return $ch if $ch;
return;
}
sub conv_gaiji_title_author {
my $s = shift;
lib/Aozora2Epub/XHTML.pm view on Meta::CPAN
$style =~ s/margin-right/margin-bottom/sg;
$div->attr('style', $style);
})
->process('span.notes', sub {
my $span = shift;
my $note = $span->as_text;
return unless $note =~ m{ï¼»ï¼[^\ï¼½]+?ã([^\ï¼½]+)ï¼½};
my $desc = $1;
my $ch = do {
if ($desc =~ /U\+([A-fa-f0-9]+)/) {
kindle_unicode_hex2chr($1);
} elsif ($desc =~ /第\dæ°´æº(\d)-(\d+)-(\d+)/) {
kindle_jis2chr(0+$1, 0+$2, 0+$3);
}
};
return unless $ch;
# find nearest â» and replace it to $ch
my $left = $span->left;
unless ($left->isa('HTML::Element')) {
if ($left =~ s/â»$/$ch/) {
t/gaiji-replace.t view on Meta::CPAN
use utf8;
use Test::More;
use Test::Base;
use Aozora2Epub;
use Aozora2Epub::Gensym;
use lib qw/./;
use t::Util;
plan tests => 1 * blocks;
sub eval_unicode_notation {
my $s = shift;
$s =~ s|\\x\{([0-9a-fA-F]+)\}|chr(hex($1))|esg;
return $s;
}
filters {
html => 'chomp',
expected => ['chomp', 'eval_unicode_notation'],
};
run {
my $block = shift;
Aozora2Epub::Gensym->reset_counter;
my $doc = Aozora2Epub->new($block->html, no_fetch_assets=>1);
my $got = join('', map { $_->as_html } @{$doc->files});
is_deeply($got, $block->expected, $block->name);
};
__DATA__
=== simple unicode
--- html
â»<span class="notes">ï¼»ï¼ãã¦ã¸ãï¼å»ããU+62BEã369-2ï¼½</span>
--- expected
\x{62be}
=== non gaiji note
--- html
ããã<span class="notes">ï¼»ï¼ ãããã¯ããï¼½</span>
--- expected
ããã<span class="notes">ï¼»ï¼ ãããã¯ããï¼½</span>
t/gaiji-replace.t view on Meta::CPAN
<img src="../../../gaiji/2-15/2-15-73.png" />
--- expected
<img src="../gaiji/2-15/2-15-73.png" />
=== kindle font broken jis 3
--- html
<img src="../../../gaiji/1-06/1-06-88.png" />
--- expected
<img src="../gaiji/1-06/1-06-88.png" />
=== kindle font broken unicode
--- html
â»<span class="notes">ï¼»ï¼ããããããU+2152ã369-2ï¼½</span>
--- expected
â»<span class="notes">ï¼»ï¼ããããããU+2152ã369-2ï¼½</span>
=== kindle font broken unicode 2
--- html
â»<span class="notes">ï¼»ï¼ããããããU+2189ã369-2ï¼½</span>
--- expected
â»<span class="notes">ï¼»ï¼ããããããU+2189ã369-2ï¼½</span>
=== kindle font broken unicode 3
--- html
â»<span class="notes">ï¼»ï¼ããããããU+26BDã369-2ï¼½</span>
--- expected
â»<span class="notes">ï¼»ï¼ããããããU+26BDã369-2ï¼½</span>
=== kindle font broken unicode 4
--- html
â»<span class="notes">ï¼»ï¼ããããããU+26BEã369-2ï¼½</span>
--- expected
â»<span class="notes">ï¼»ï¼ããããããU+26BEã369-2ï¼½</span>
=== kindle font broken unicode 5
--- html
â»<span class="notes">ï¼»ï¼ããããããU+3244ã369-2ï¼½</span>
--- expected
â»<span class="notes">ï¼»ï¼ããããããU+3244ã369-2ï¼½</span>
=== kindle font broken unicode over 0xffff
--- html
â»<span class="notes">ï¼»ï¼ããããããU+1F130ã369-2ï¼½</span>
--- expected
â»<span class="notes">ï¼»ï¼ããããããU+1F130ã369-2ï¼½</span>
=== kindle font broken unicode over 0xffff but ok
--- html
â»<span class="notes">ï¼»ï¼ããããããU+2a2b2ã369-2ï¼½</span>
--- expected
\x{2a2b2}
t/gaiji-title-author.t view on Meta::CPAN
use warnings;
use utf8;
use Test::More;
use Test::Base;
use Aozora2Epub::XHTML;
use lib qw/./;
use t::Util;
plan tests => 1 * blocks;
sub eval_unicode_notation {
my $s = shift;
$s =~ s|\\x\{([0-9a-fA-F]+)\}|chr(hex($1))|esg;
return $s;
}
filters {
input => 'chomp',
expected => ['chomp', 'eval_unicode_notation'],
};
run {
my $block = shift;
my $got = Aozora2Epub::XHTML::conv_gaiji_title_author($block->input);
is $got, $block->expected, $block->name;
};
__DATA__
t/gaiji-title-author.t view on Meta::CPAN
æãâ»ï¼»ï¼ãã¼ãæ°å1ã1-13-21ï¼½ã»â»ï¼»ï¼ãã¼ãæ°å2ã1-13-22ï¼½
--- expected
æã\x{2160}ã»\x{2161}
=== not kome
--- input
ï¼ï¼ï¼»ï¼ãï¼ãã¯ãã¼ãæ°åã1-13-22ï¼½ï¼
--- expected
ï¼\x{2161}ï¼
=== unicode
--- input
ãã¾â»ï¼»ï¼ããããã¸ãï¼æããU+897Bï¼½
--- expected
ãã¾\x{897b}
=== unicode bad font
--- input
失â»ï¼»ï¼ã人ãããï¼äºï¼å¿ããU+2B779ã表ç´ï¼½è¡è¬ç¾©
--- expected
失â»ï¼»ï¼ã人ãããï¼äºï¼å¿ããU+2B779ã表ç´ï¼½è¡è¬ç¾©
--- note
2b779ã¯kindleã ã¨è±è
ã«ãªã
=== no chuuki
--- input
ããããã
( run in 0.323 second using v1.01-cache-2.11-cpan-88abd93f124 )