App-Kramerius-To-Images
view release on metacpan or search on metacpan
package App::Kramerius::To::Images;
use strict;
use warnings;
use App::Kramerius::V4;
use Class::Utils qw(set_params);
use Cwd qw(cwd);
use Data::Kramerius;
use English;
use Error::Pure qw(err);
use File::Spec::Functions qw(catfile);
use Getopt::Std;
use HTTP::Request;
use IO::Barf qw(barf);
use JSON::XS;
use LWP::UserAgent;
use METS::Files;
use Perl6::Slurp qw(slurp);
our $VERSION = 0.04;
# Constructor.
sub new {
my ($class, @params) = @_;
# Create object.
my $self = bless {}, $class;
# Directory to store files.
$self->{'dir_to_store_files'} = undef;
# LWP::UserAgent object.
$self->{'lwp_user_agent'} = undef;
# Process parameters.
set_params($self, @params);
$self->{'_kramerius'} = Data::Kramerius->new;
if (defined $self->{'lwp_user_agent'}) {
if (! $self->{'lwp_user_agent'}->isa('LWP::UserAgent')) {
err "Parameter 'lwp_user_agent' must be a LWP::UserAgent instance.";
}
} else {
$self->{'lwp_user_agent'} = LWP::UserAgent->new;
$self->{'lwp_user_agent'}->agent('kramerius2images/'.$VERSION);
}
# Object.
return $self;
}
# Run.
sub run {
my $self = shift;
# Process arguments.
$self->{'_opts'} = {
'h' => 0,
'q' => 0,
'v' => 0,
};
if (! getopts('hqv', $self->{'_opts'}) || (! -r 'ROOT' && @ARGV < 2)
|| $self->{'_opts'}->{'h'}) {
print STDERR "Usage: $0 [-h] [-q] [-v] [--version] [kramerius_id object_id]\n";
print STDERR "\t-h\t\tHelp.\n";
print STDERR "\t-q\t\tQuiet mode.\n";
print STDERR "\t-v\t\tVerbose mode.\n";
print STDERR "\t--version\tPrint version.\n";
print STDERR "\tkramerius_id\tKramerius system id. e.g. ".
"mzk\n";
print STDERR "\tobject_id\tKramerius object id (could be ".
"page, series or book edition).\n";
return 1;
);
# Get 'img' files.
my @page_uris = $obj->get_use_files('img');
if (! @page_uris) {
err 'No images to download.';
}
# Get images.
foreach my $page (@page_uris) {
my $uri = URI->new($page);
my @path_segments = $uri->path_segments;
if (! -r $path_segments[-1]) {
if (! $self->{'_opts'}->{'q'}) {
print "$page\n";
}
$self->_download($page, catfile($dir_to_store_files, $path_segments[-1]));
}
# Strip URI part.
push @pages, $path_segments[-1];
}
# Direct file.
} else {
# TODO Stahnout primo soubor. Udelat na to skript.
err "Cannot get '$mets_uri' URI.",
'HTTP code', $res->code,
'Message', $res->message;
}
} elsif ($self->{'_kramerius_obj'}->version == 4) {
# URI for children JSON.
my $json_uri = $self->{'_kramerius_obj'}->url.'search/api/v5.0/item/uuid:'.
$object_id.'/children';
# Get JSON.
my $req = HTTP::Request->new('GET' => $json_uri);
my $res = $self->{'lwp_user_agent'}->request($req);
my $json;
if ($res->is_success) {
$json = $res->content;
barf(catfile($dir_to_store_files, $object_id.'.json'), $json);
} else {
err "Cannot get '$json_uri' URI.",
'HTTP code', $res->code,
'message', $res->message;
}
# Check JSON content type.
if ($res->headers->content_type ne 'application/json') {
err "Content type isn't 'application/json' for '$json_uri' URI.",
'Content-Type', $res->headers->content_type;
}
# Get perl structure.
my $json_ar = eval {
JSON::XS->new->decode($json);
};
if ($EVAL_ERROR) {
err "Cannot parse JSON on '$json_uri' URI.",
'JSON decode error', $EVAL_ERROR;
}
# Each page.
my $images = 0;
foreach my $page_hr (@{$json_ar}) {
if ($page_hr->{'model'} ne 'page') {
next;
}
my $title = $self->_get_page_title($page_hr);
my $pid = $page_hr->{'pid'};
$pid =~ s/^uuid://ms;
# TODO Support for page number in $pid =~ uuid:__uuid__@__page_number__ (PDF and number of page in PDF)
if (! $self->{'_opts'}->{'q'}) {
print "$pid: $title\n";
}
# XXX Support of jpg only
if (! -r $pid.'.jpg') {
my $out_file = '-o '.catfile($dir_to_store_files, $pid.'.jpg');
$self->_do_command("kramerius4 $out_file $quiet $kramerius_id $pid");
}
push @pages, $pid.'.jpg';
$images++;
}
# One page.
if ($images == 0) {
my $pid = $object_id;
if (! $self->{'_opts'}->{'q'}) {
print "$pid: ?\n";
}
# XXX Support of jpg only
my $output_file = $pid.'.jpg';
if (! -r $output_file) {
my $out_file = '-o '.catfile($dir_to_store_files, $output_file);
$self->_do_command("kramerius4 $out_file $quiet $kramerius_id $pid");
}
push @pages, $output_file;
}
} else {
err 'Bad version of Kramerius.',
'Kramerius version', $self->{'_kramerius_obj'}->version;
}
barf(catfile($dir_to_store_files, 'LIST'), join "\n", @pages);
return 0;
}
sub _download {
my ($self, $uri, $local_file) = @_;
$self->{'lwp_user_agent'}->get($uri,
':content_file' => $local_file,
);
return;
}
Returns 1 for error, 0 for success.
=head1 ERRORS
new():
From Class::Utils::set_params():
Unknown parameter '%s'.
Parameter 'lwp_user_agent' must be a LWP::UserAgent instance.
run():
Bad version of Kramerius.
Kramerius version: %s
Cannot get title for page '%s'.
Cannot get '%s' URI.
HTTP code: %s
message: %s
Cannot parse JSON on '%s' URI.
JSON decode error: %s
Cannot read library id and work id.
Content type isn't 'application/json' for '%s' URI.
Content-Type: %s
Library with ID '%s' is unknown.
No images to download.
=head1 EXAMPLE
=for comment filename=print_help.pl
use strict;
use warnings;
use App::Kramerius::To::Images;
# Arguments.
@ARGV = (
'-h',
);
# Run.
exit App::Kramerius::To::Images->new->run;
# Output like:
# Usage: ./ex1.pl [-h] [-q] [-v] [--version] [kramerius_id object_id]
# -h Help.
# -q Quiet mode.
# -v Verbose mode.
# --version Print version.
# kramerius_id Kramerius system id. e.g. mzk
# object_id Kramerius object id (could be page, series or book edition).
=head1 DEPENDENCIES
L<App::Kramerius::V4>,
L<Class::Utils>,
L<Data::Kramerius>,
L<English>,
L<Error::Pure>,
L<Getopt::Std>,
L<HTTP::Request>,
L<IO::Barf>,
L<JSON::XS>,
L<LWP::UserAgent>,
L<METS::Files>,
L<Perl6::Slurp>.
=head1 SEE ALSO
=over
=item L<Task::Kramerius>
Install modules for Kramerius system.
=item L<App::Images::To::DjVu>
Base class for images2djvu script.
=back
=head1 REPOSITORY
L<https://github.com/michal-josef-spacek/App-Kramerius-To-Images>
=head1 AUTHOR
Michal Josef Å paÄek L<mailto:skim@cpan.org>
L<http://skim.cz>
=head1 LICENSE AND COPYRIGHT
© 2021 Michal Josef Å paÄek
BSD 2-Clause License
=head1 VERSION
0.04
=cut
( run in 0.654 second using v1.01-cache-2.11-cpan-39bf76dae61 )