Renard-Block-Format-PDF
view release on metacpan or search on metacpan
lib/Renard/Block/Format/PDF/Document.pm view on Meta::CPAN
use Renard::Incunabula::Common::Setup;
package Renard::Block::Format::PDF::Document;
# ABSTRACT: document that represents a PDF file
$Renard::Block::Format::PDF::Document::VERSION = '0.005';
use Moo;
use Renard::API::MuPDF::mutool;
use Renard::Block::Format::PDF::Page;
use Renard::Incunabula::Outline;
use Renard::Incunabula::Document::Types qw(PageNumber ZoomLevel);
use Renard::Incunabula::Common::Types qw(InstanceOf);
use Math::Trig;
use Math::Polygon;
use String::Tagged;
use Function::Parameters;
extends qw(Renard::Incunabula::Document);
has _raw_bounds => (
is => 'lazy', # _build_raw_bounds
);
method _build_last_page_number() :ReturnType(PageNumber) {
my $info = Renard::API::MuPDF::mutool::get_mutool_page_info_xml(
$self->filename
);
return scalar @{ $info->{page} };
}
method get_rendered_page( (PageNumber) :$page_number, (ZoomLevel) :$zoom_level = 1.0 ) {
return Renard::Block::Format::PDF::Page->new(
document => $self,
page_number => $page_number,
zoom_level => $zoom_level,
);
}
method _build_outline() {
my $outline_data = Renard::API::MuPDF::mutool::get_mutool_outline_simple(
$self->filename
);
return Renard::Incunabula::Outline->new( items => $outline_data );
}
method _build__raw_bounds() {
my $info = Renard::API::MuPDF::mutool::get_mutool_page_info_xml(
$self->filename
);
}
method _build_identity_bounds() {
my $compute_rotate_dim = sub {
my ($info) = @_;
my $theta_deg = $info->{rotate} // 0;
my $theta_rad = $theta_deg * pi / 180;
my ($x, $y) = ($info->{x}, $info->{y});
my $poly = Math::Polygon->new(
points => [
[0, 0],
[$x, 0],
[$x, $y],
[0, $y],
],
);
my $rotated_poly = $poly->rotate(
degrees => $theta_deg,
center => [ $x/2, $y/2 ],
);
my ($xmin, $ymin, $xmax, $ymax) = $rotated_poly->bbox;
return { w => $xmax - $xmin, h => $ymax - $ymin };
};
my $bounds = $self->_raw_bounds;
my @page_xy = map {
my $p = {
x => $_->{CropBox}{r}-$_->{CropBox}{l},
y => $_->{CropBox}{t}-$_->{CropBox}{b},
rotate => $_->{Rotate}{v} // 0,
pageno => $_->{pagenum},
};
if( exists $p->{rotate} ) {
$p->{dims} = $compute_rotate_dim->( $p );
}
$p;
} @{ $bounds->{page} };
return \@page_xy;
}
method get_textual_page( (PageNumber) $page_number )
:ReturnType(InstanceOf['String::Tagged']) {
my $page_st = String::Tagged->new;
my $stext = Renard::API::MuPDF::mutool::get_mutool_text_stext_xml(
$self->filename,
$page_number
);
my $levels = [ qw(document page block line font char) ];
_walk_page_data( $page_st, $stext, 0, $levels );
$page_st;
}
fun _walk_page_data( $tagged, $data, $depth, $levels ) {
my $level_tagged = String::Tagged->new("");
if( $depth == @$levels - 1 ) {
# last level is the character, so we append that to the string
$level_tagged .= $data->{c};
} else {
# empty pages will not have this data
( run in 1.753 second using v1.01-cache-2.11-cpan-39bf76dae61 )