Renard-Block-Format-PDF

 view release on metacpan or  search on metacpan

lib/Renard/Block/Format/PDF/Document.pm  view on Meta::CPAN

use Renard::Incunabula::Common::Setup;
package Renard::Block::Format::PDF::Document;
# ABSTRACT: document that represents a PDF file
$Renard::Block::Format::PDF::Document::VERSION = '0.005';
use Moo;
use Renard::API::MuPDF::mutool;
use Renard::Block::Format::PDF::Page;
use Renard::Incunabula::Outline;
use Renard::Incunabula::Document::Types qw(PageNumber ZoomLevel);
use Renard::Incunabula::Common::Types qw(InstanceOf);

use Math::Trig;
use Math::Polygon;

use String::Tagged;

use Function::Parameters;

extends qw(Renard::Incunabula::Document);

has _raw_bounds => (
	is => 'lazy', # _build_raw_bounds
);

method _build_last_page_number() :ReturnType(PageNumber) {
	my $info = Renard::API::MuPDF::mutool::get_mutool_page_info_xml(
		$self->filename
	);

	return scalar @{ $info->{page} };
}

method get_rendered_page( (PageNumber) :$page_number, (ZoomLevel) :$zoom_level = 1.0 ) {
	return Renard::Block::Format::PDF::Page->new(
		document => $self,
		page_number => $page_number,
		zoom_level => $zoom_level,
	);
}

method _build_outline() {
	my $outline_data = Renard::API::MuPDF::mutool::get_mutool_outline_simple(
		$self->filename
	);

	return Renard::Incunabula::Outline->new( items => $outline_data );
}

method _build__raw_bounds() {
	my $info = Renard::API::MuPDF::mutool::get_mutool_page_info_xml(
		$self->filename
	);
}

method _build_identity_bounds() {
	my $compute_rotate_dim = sub {
		my ($info) = @_;
		my $theta_deg = $info->{rotate} // 0;
		my $theta_rad = $theta_deg * pi / 180;

		my ($x, $y) = ($info->{x}, $info->{y});
		my $poly = Math::Polygon->new(
			points => [
				[0, 0],
				[$x, 0],
				[$x, $y],
				[0, $y],
			],
		);

		my $rotated_poly = $poly->rotate(
			degrees => $theta_deg,
			center => [ $x/2, $y/2 ],
		);

		my ($xmin, $ymin, $xmax, $ymax) = $rotated_poly->bbox;


		return { w => $xmax - $xmin, h => $ymax - $ymin };
	};

	my $bounds = $self->_raw_bounds;
	my @page_xy = map {
		my $p = {
			x => $_->{CropBox}{r}-$_->{CropBox}{l},
			y => $_->{CropBox}{t}-$_->{CropBox}{b},
			rotate => $_->{Rotate}{v} // 0,
			pageno => $_->{pagenum},
		};
		if( exists $p->{rotate} ) {
			$p->{dims} = $compute_rotate_dim->( $p );
		}

		$p;
	} @{ $bounds->{page} };

	return \@page_xy;
}

method get_textual_page( (PageNumber) $page_number )
		:ReturnType(InstanceOf['String::Tagged']) {
	my $page_st = String::Tagged->new;

	my $stext = Renard::API::MuPDF::mutool::get_mutool_text_stext_xml(
		$self->filename,
		$page_number
	);

	my $levels = [ qw(document page block line font char) ];
	_walk_page_data( $page_st, $stext, 0, $levels );

	$page_st;
}

fun _walk_page_data( $tagged, $data, $depth, $levels ) {
	my $level_tagged = String::Tagged->new("");

	if( $depth == @$levels - 1 ) {
		# last level is the character, so we append that to the string
		$level_tagged .= $data->{c};
	} else {
		# empty pages will not have this data



( run in 1.753 second using v1.01-cache-2.11-cpan-39bf76dae61 )