CAM-PDF
view release on metacpan or search on metacpan
lib/CAM/PDF.pm view on Meta::CPAN
for my $line (split /\n/xms, $text) # trailing null strings omitted
{
$lines++;
my $w = $self->getStringWidth($fontmetrics, $line);
if ($w && $w > $stringwidth)
{
$stringwidth = $w;
}
}
$lines ||= 1;
# Initial guess
$fontsize = ($dy - 2 * $border) / ($lines * 1.5);
my $fontwidth = $fontsize * $stringwidth;
my $maxwidth = $dx - 2 * $border;
if ($fontwidth > $maxwidth)
{
$fontsize *= $maxwidth / $fontwidth;
}
# allow for user override
if (exists $opts->{max_autoscale_fontsize} && $fontsize > $opts->{max_autoscale_fontsize}) {
lib/CAM/PDF.pm view on Meta::CPAN
# escape characters
$text = $self->writeString($text);
if ($flags{Multiline})
{
# TODO: wrap the field with wrapString()??
# Shawn Dawson of Silent Solutions pointed out that this does not auto-wrap the input text
my $linebreaks = $text =~ s/ \\n /\) Tj T* \(/gxms;
# Total guess work:
# line height is either 150% of fontsize or thrice
# the corner offset
$tl = $fontsize ? $fontsize * 1.5 : $ty * 3;
# Bottom aligned
#$ty += $linebreaks * $tl;
# Top aligned
$ty = $dy - $border - $tl;
if ($flags{Justify} ne 'left')
lib/CAM/PDF/PageText.pm view on Meta::CPAN
=head1 SYNOPSIS
my $pdf = CAM::PDF->new($filename);
my $pageone_tree = $pdf->getPageContentTree(1);
print CAM::PDF::PageText->render($pageone_tree);
=head1 DESCRIPTION
This module attempts to extract sequential text from a PDF page. This
is not a robust process, as PDF text is graphically laid out in
arbitrary order. This module uses a few heuristics to try to guess
what text goes next to what other text, but may be fooled easily by,
say, subscripts, non-horizontal text, changes in font, form fields
etc.
All those disclaimers aside, it is useful for a quick dump of text
from a simple PDF file.
=head1 LICENSE
Same as L<CAM::PDF>
( run in 1.166 second using v1.01-cache-2.11-cpan-748bfb374f4 )