Aozora2Epub
view release on metacpan or search on metacpan
lib/Aozora2Epub.pm view on Meta::CPAN
package Aozora2Epub;
use utf8;
use strict;
use warnings;
use Aozora2Epub::Gensym;
use Aozora2Epub::CachedGet qw/http_get/;
use Aozora2Epub::Epub;
use Aozora2Epub::XHTML;
use Path::Tiny;
use URI;
use HTML::Escape qw/escape_html/;
use base qw(Class::Accessor);
__PACKAGE__->mk_accessors(qw/files title author epub bib_info notation_notes/);
our $VERSION = '0.05';
our $AOZORA_GAIJI_URL = 'https://www.aozora.gr.jp/gaiji/';
our $AOZORA_CARDS_URL = 'https://www.aozora.gr.jp/cards';
sub _base_url {
my $base = shift;
$base =~ s{[^/]+\.html$}{}s;
return $base;
}
sub _get_file {
my $url_or_path = "" . shift; # force to string.
if ($url_or_path =~ m{^https?://}) {
return http_get($url_or_path);
}
if ($url_or_path =~ m{\.html$}) {
return path($url_or_path)->slurp_utf8;
}
return path($url_or_path)->slurp_raw;
}
sub _get_content {
my $xhtml = shift;
if ($xhtml =~ m{/card\d+\.html$}) { # 峿¸ã«ã¼ã
unless ($xhtml =~ m{^https?://}) { # $xhtml shuld be \d+/card\d+.html
$xhtml = "$AOZORA_CARDS_URL/$xhtml";
}
my $text = _get_file($xhtml);
my $tree = Aozora2Epub::XHTML::Tree->new($text);
my $xhtml_url;
$tree->process('//a[text()="ãã¾ããXHTMLçã§èªã"]' => sub {
$xhtml_url = shift->attr('href');
});
my $xhtml_uri = URI->new($xhtml_url)->abs(URI->new($xhtml));
return _get_content($xhtml_uri->as_string);
}
if ($xhtml =~ m{/files/\d+_\d+\.html$}) { # XHTML
unless ($xhtml =~ m{^https?://}) { # $xhtml shuld be \d+/files/xxx_xxx.html
$xhtml = "$AOZORA_CARDS_URL/$xhtml";
}
my $text = _get_file($xhtml);
return ($text, _base_url($xhtml));
}
# XHTML string
return (qq{<div class="main_text">$xhtml</div>}, undef);
}
sub new {
my ($class, $content, %options) = @_;
my $self = bless {
files => [],
epub => Aozora2Epub::Epub->new,
title => undef,
author => undef,
bib_info => '',
notation_notes => '',
}, $class;
$self->append($content, %options, title=>'') if $content;
return $self;
}
sub _cat_url {
my ($base, $path) = @_;
unless ($base =~ m{^https?://}) {
return path($base, $path);
}
return URI->new($path)->abs(URI->new($base));
}
sub _build_elemlist_from_xhtml {
my $xhtml = shift;
my $tr = Aozora2Epub::XHTML->new_from_string(qq{<div class="main_text">$xhtml</div>});;
return @{$tr->contents};
}
sub append {
my ($self, $xhtml_like, %options) = @_;
my ($xhtml, $base_url) = _get_content($xhtml_like);
my $doc = Aozora2Epub::XHTML->new_from_string($xhtml);
unless ($options{no_fetch_assets}) {
for my $path (@{$doc->gaiji}) {
my $png = _get_file(_cat_url($AOZORA_GAIJI_URL, $path));
$self->epub->add_gaiji($png, $path);
}
for my $path (@{$doc->fig}) {
my $png = _get_file(_cat_url($base_url, $path));
$self->epub->add_image($png, $path);
}
}
my @files = $doc->split;
my $part_title;
if (defined $options{title_html}) {
$files[0]->insert_content(_build_elemlist_from_xhtml($options{title_html}));
} else {
unless (defined $options{title}) {
if ($options{use_subtitle}) {
$part_title = $doc->subtitle;
}
$part_title ||= $doc->title;
} elsif ($options{title} eq '') {
$part_title = undef;
} else {
lib/Aozora2Epub.pm view on Meta::CPAN
} else {
push @cur, @{$children};
}
next;
}
if ($lev < $level) {
$putback->($c);
return \@cur;
}
push @cur, {
name => gensym,
level => $lev,
id => $e->attr('id'),
title => $e->as_text,
file => $c->{file},
};
}
return \@cur;
}
sub _make_toc {
my $self = shift;
my ($next, $putback) = _make_content_iterator($self->{files});
return _toc(1, $next, $putback);
}
sub toc {
my ($self, $toc) = @_;
unless ($toc) {
$self->{toc} ||= $self->_make_toc;
return $self->{toc};
}
$self->{toc} = $toc;
}
sub to_epub {
my ($self, %options) = @_;
my $epub_filename = $options{output};
$epub_filename ||= $self->title . ".epub";
if ($options{cover}) {
$self->epub->set_cover($options{cover});
}
$self->epub->build_from_doc($self);
$self->epub->save($epub_filename);
}
sub as_html {
my $self = shift;
return join('', map { $_->as_html } @{$self->files});
}
1;
__END__
=encoding utf-8
=head1 NAME
Aozora2Epub - Convert Aozora Bunko XHTML to EPUB
=head1 SYNOPSIS
use Aozora2Epub;
my $book = Aozora2Epub->new("https://www.aozora.gr.jp/cards/000262/files/48074_40209.html");
$book->to_epub;
# 忬ã®ä½æ
$book = Aozora2Epub->new();
$book->append("000879/card179.html"); # èªã®ä¸
$book->append("000879/card127.html"); # ç¾
çé
$book->title('è¥å·ç«ä¹ä»ä½åé');
$book->to_epub;
=head1 DESCRIPTION
Aozora2Epub ã¯é空æåº«ã®XHTMLå½¢å¼ã®æ¬ãEPUBã«å¤æããã¢ã¸ã¥ã¼ã«ã§ãã
ç°¡åã«åæ¬ãçæããããã®ã¤ã³ã¿ãã§ã¼ã¹ãæä¾ãã¦ãã¾ãã
=head1 METHODS
=head2 new
my $book = Aozora2Epub->new($book_url);
my $book = Aozora2Epub->new($xhtml_string);
my $book = Aozora2Epub->new(); # 空ã®ããã¥ã¡ã³ããä½ã
C<$bool_url>ã§æå®ããé空æåº«ã®æ¬ãèªã¿è¾¼ã¿ã¾ãã
ãããã¯ãæååã¨ãã¦æå®ãããæ´å½¢å¼ã®XHTMLãæ¬ã®å
容ã¨ãã¦èªã¿è¾¼ã¿ã¾ãã
æ¬ã¯ä»¥ä¸ã®ããããã®å½¢å¼ã§æå®ãã¾ãã
ãããããURLå
é ã® C<https://www.aozora.gr.jp/cards/>ã®é¨åãçç¥ãããã¨ãå¯è½ã§ãã
=over 4
=item 峿¸ã«ã¼ãã®URL
é空æåº«ã®å³æ¸ã«ã¼ãã®URLã§ãã以ä¸ã«ä¾ã示ãã¾ãã
https://www.aozora.gr.jp/cards/001569/card59761.html
001569/card59761.html # URLã®å
é é¨åãçç¥
=item XHTMLã®URL
é空æåº«ã®XHTMLãã¡ã¤ã«ã®URLã§ãã以ä¸ã«ä¾ã示ãã¾ãã
https://www.aozora.gr.jp/cards/001569/files/59761_74795.html
001569/files/59761_74795.html # URLã®å
é é¨åãçç¥
=back
=head2 append
$book->append($book_url); # 追å ããæ¬ã®ã¿ã¤ãã«ãç« ã¿ã¤ãã«ã¨ãã¦ä½¿ç¨
$book->append($book_url, use_subtitle=>1); # 追å ããæ¬ã®ãµãã¿ã¤ãã«ãç« ã¿ã¤ãã«ã¨ãã¦ä½¿ç¨
( run in 2.064 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )