Aozora2Epub

 view release on metacpan or  search on metacpan

MANIFEST  view on Meta::CPAN

t/okuzuke.t
t/toc.t
xt/ZipDiff.pm
xt/epub.t
xt/expected/01_000.epub
xt/expected/02_000-with-cover.epub
xt/expected/02_000.epub
xt/expected/04_000.epub
xt/gappon.t
xt/input/01/files/01_000.html
xt/input/01/files/fig0.png
xt/input/02/files/02_000.html
xt/input/03/files/03_000.html
xt/input/04/files/04_000.html
xt/input/cover.jpg
xt/input/gaiji/1-90/1-90-61.png
xt/live.t

lib/Aozora2Epub.pm  view on Meta::CPAN

}

sub append {
    my ($self, $xhtml_like, %options) = @_;

    my ($xhtml, $base_url) = _get_content($xhtml_like);
    my $doc = Aozora2Epub::XHTML->new_from_string($xhtml);

    unless ($options{no_fetch_assets}) {
        for my $path (@{$doc->gaiji}) {
            my $png = _get_file(_cat_url($AOZORA_GAIJI_URL, $path));
            $self->epub->add_gaiji($png, $path);
        }
        for my $path (@{$doc->fig}) {
            my $png = _get_file(_cat_url($base_url, $path));
            $self->epub->add_image($png, $path);
        }
    }
    my @files = $doc->split;
    my $part_title;
    if (defined $options{title_html}) {
        $files[0]->insert_content(_build_elemlist_from_xhtml($options{title_html}));
    } else {
        unless (defined $options{title}) {
            if ($options{use_subtitle}) {
                $part_title = $doc->subtitle;

lib/Aozora2Epub/XHTML.pm  view on Meta::CPAN

        $indent .= " " . $h->attr('style') if $h->attr('style');
        $h->attr('style', $indent);
        $parent->replace_with($h);
    }
}

sub _process_img {
    my $img = shift;

    my $src = $img->attr('src');
    if ($src =~ m{/(gaiji/\d-\d+/(\d)-(\d\d)-(\d\d)\.png)$}) {
        my $ch = kindle_jis2chr(0+$2, 0+$3, 0+$4);
        if ($ch) {
            $img->replace_with($ch);
            return;
        }
        $img->attr('src', "../$1");
        return $src;
    }
    # normal image
    $img->attr('src', "../images/$src");

lib/Aozora2Epub/XHTML.pm  view on Meta::CPAN

                return;
            }
        })
        ->as_list;

    # 先頭の<br/>の連続は削除
    while ($contents[0] && _is_empty($contents[0])) { shift @contents; };

    my (@gaiji, @fig);
    for my $path (@images) {
        if ($path =~ m{gaiji/(.+\.png)$}) {
            push @gaiji, $1;
        } else {
            push @fig, $path;
        }
    }
    $self->title(conv_gaiji_title_author($title));
    $self->subtitle(conv_gaiji_title_author($subtitle));
    $self->author(conv_gaiji_title_author($author));
    $self->contents(\@contents);
    $self->bib_info($bib_info || '');

share/basic/EPUB/content.opf  view on Meta::CPAN

    : if $has_okuzuke {
    <item id="okuzuke_xhtml" href="text/okuzuke.xhtml" media-type="application/xhtml+xml" />
    : }
    : if $has_sections {
    <item id="toc" href="toc.xhtml" media-type="application/xhtml+xml" />
    : }
    : for $files -> $c {
    <item id="<: $c.name :>_xhtml" href="text/<: $c.name :>.xhtml" media-type="application/xhtml+xml" />
    : }
    : for $assets -> $c {
    <item id="<: $c.name :>" href="<: $c.value :>" media-type="image/png" />
    : }
  </manifest>
  <spine toc="ncx" page-progression-direction="rtl">
    <itemref idref="title_page_xhtml" linear="yes" />
    : if $has_sections {
    <itemref idref="toc" />
    : }
    : for $files -> $c {
    <itemref idref="<: $c.name :>_xhtml" />
    : }

t/files.t  view on Meta::CPAN

<br />
<div class="jisage_2" style="margin-left: 2em">
  <h3 class="o-midashi"><a class="midashi_anchor" id="midashi560">先生と私</a></h3>
</div>
<br />
--- expected
- '<h3 class="o-midashi" id="midashi560" style="text-indent: 2em">先生と私</h3><br />'

=== figure with caption
--- html
<img class="illustration" src="fig4990_07.png" alt="XXX のキャプション付きの図" /><br />
<span class="caption">XXX</span><br />
--- expected
- |
  <figure>
    <img alt="XXX のキャプション付きの図" class="illustration" src="../images/fig4990_07.png" />
    <figcaption class="caption">XXX</figcaption>
  </figure>
  <br />

=== jisage
--- html
<div style="margin-left: 3em">あいうえお</div>
--- expected
- '<div style="margin-top: 3em">あいうえお</div>'

t/gaiji-replace.t  view on Meta::CPAN

\x{6fde}

=== corrupted jis
--- html
ああ<span class="notes">[#「さんずい+鼾のへん」、第4水準2-79-37]</span>
--- expected
ああ<span class="notes">[#「さんずい+鼾のへん」、第4水準2-79-37]</span>

=== image gaiji in rb
--- html
博物学者は<ruby><rb>※<img src="../../../gaiji/1-91/1-91-65.png" alt="※(「虫+斯」、第3水準1-91-65)" class="gaiji" /></rb><rp>(</rp><rt>けむし</rt><rp>)</rp></ruby><span class="notes">[#「虫+占」、U+86C5、18-5]</...
--- expected
博物学者は<ruby><rb>\x{86c5}\x{87d6}</rb><rp>(</rp><rt>けむし</rt><rp>)</rp></ruby>の変じ

=== kindle font broken jis
--- html
<img src="../../../gaiji/1-90/1-90-61.png" />
--- expected
<img src="../gaiji/1-90/1-90-61.png" />

=== kindle font broken jis 2
--- html
<img src="../../../gaiji/2-15/2-15-73.png" />
--- expected
<img src="../gaiji/2-15/2-15-73.png" />

=== kindle font broken jis 3
--- html
<img src="../../../gaiji/1-06/1-06-88.png" />
--- expected
<img src="../gaiji/1-06/1-06-88.png" />

=== kindle font broken unicode
--- html
※<span class="notes">[#「あああ」、U+2152、369-2]</span>
--- expected
※<span class="notes">[#「あああ」、U+2152、369-2]</span>

=== kindle font broken unicode 2
--- html
※<span class="notes">[#「あああ」、U+2189、369-2]</span>

xt/gappon.t  view on Meta::CPAN

    };
}
done_testing();

sub book1 {
    my $html =<<'HTML';
<h2 id="g000000006">テスト1</h2>
<h3 id="g000000000">大見出し1</h3>
<h4 id="g000000002">中見出し1-1</h4>
 あれや。これや。<br />
<img src="../images/fig0.png" /><br />
 図その1。<br />
<h3 id="g000000001">大見出し2</h3>
<h4 id="g000000003">中見出し2-1</h4>
 どれや。それや。<img class="gaiji" src="../gaiji/1-90/1-90-61.png" />ですね。<br />
<h2 id="g000000008">テスト-no-toc</h2>
 あれや。これや。<br />
 どれや。それや。ですね。<br />
HTML
    $html =~ s/\n//sg;
    return $html
}

sub book2 {
    my $html =<<'HTML';
<h2 id="g000000006">part1</h2>
<h3 id="g000000000">大見出し1</h3>
<h4 id="g000000002">中見出し1-1</h4>
 あれや。これや。<br />
<img src="../images/fig0.png" /><br />
 図その1。<br />
<h3 id="g000000001">大見出し2</h3>
<h4 id="g000000003">中見出し2-1</h4>
 どれや。それや。<img class="gaiji" src="../gaiji/1-90/1-90-61.png" />ですね。<br />
<h2 id="g000000008">part2</h2>
 あれや。これや。<br />
 どれや。それや。ですね。<br />
HTML
    $html =~ s/\n//sg;
    return $html
}

sub book3 {
    my $html =<<'HTML';
<h1 id="g000000006">part1</h1>
<h3 id="g000000000">大見出し1</h3>
<h4 id="g000000002">中見出し1-1</h4>
 あれや。これや。<br />
<img src="../images/fig0.png" /><br />
 図その1。<br />
<h3 id="g000000001">大見出し2</h3>
<h4 id="g000000003">中見出し2-1</h4>
 どれや。それや。<img class="gaiji" src="../gaiji/1-90/1-90-61.png" />ですね。<br />
<h2 id="g000000008">part2</h2>
 あれや。これや。<br />
 どれや。それや。ですね。<br />
HTML
    $html =~ s/\n//sg;
    return $html
}

sub book4 {
    my $html =<<'HTML';

xt/input/01/files/01_000.html  view on Meta::CPAN

<div class="metadata">
<h1 class="title">テスト1</h1>
<h2 class="author">酔狂亭不出来</h2>
<br />
<br />
</div>
<div class="main_text"><br />
<h3>大見出し1</h3>
<h4>中見出し1-1</h4>
あれや。これや。<br />
<img src="fig0.png" /><br />
図その1。<br />
<h3>大見出し2</h3>
<h4>中見出し2-1</h4>
どれや。それや。<img class="gaiji" src="../../../gaiji/1-90/1-90-61.png" />ですね。<br />
</div>
<div class="bibliographical_information">
<hr />
<br />
これは奥付です。<br />
<br />
<br />
</div>
</body>
</html>

xt/live.t  view on Meta::CPAN

    is $book->title, $title, "title  $url";
    is $book->author, $author, "author $url";
}

{
    local $ENV{AOZORA2EPUB_CACHE} = Path::Tiny->tempdir;
    Aozora2Epub::CachedGet::init_cache();

    dotest('001637/files/59055_69954.html', 'ある日', '中野鈴子');
    dotest('001637/card59055.html', 'ある日', '中野鈴子');
    is(http_get('https://www.aozora.gr.jp/gaiji/1-01/1-01-35.png'), png_1_01_35(), "png");
    is(http_get('https://www.aozora.gr.jp/gaiji/1-01/1-01-35.png'), png_1_01_35(), "png cached");
}

done_testing();

sub png_1_01_35 {
    join('',
         qq{\211PNG\r\n\032\n\0\0\0\rIHDR},
         qq{\0\0\0\020\0\0\0\020\001\003\0\0\0%=m},
         qq{"\0\0\0\006PLTE\377\377\377\0\0\0U},
         qq{\302\323~\0\0\0\024IDATx\234b`h},
         qq{` \005\001\0\0\0\377\377\003\0\2740\b\001\264},
         qq{:\a\277\0\0\0\0IEND\256B`\202},
     );
}



( run in 1.765 second using v1.01-cache-2.11-cpan-df04353d9ac )