App-PDFUtils

 view release on metacpan or  search on metacpan

lib/App/PDFUtils.pm  view on Meta::CPAN

    return_output_file => {
        summary => 'Return the path of output file instead',
        schema => 'bool*',
        description => <<'MARKDOWN',

This is useful when you do not specify an output file but do not want to show
the converted document to stdout, but instead want to get the path to a
temporary output file.

MARKDOWN
    },
);

$SPEC{add_pdf_password} = {
    v => 1.1,
    summary => 'Password-protect PDF files',
    description => <<'MARKDOWN',

This program is a wrapper for <prog:qpdf> to password-protect PDF files
(in-place). This is the counterpart for <prog:remove-pdf-password>. Why use this
wrapper instead of **qpdf** directly? This wrapper offers configuration file
support, where you can put the password(s) you want to use there. The wrapper
also offers multiple file support and additional options, e.g. whether to create
backup.

MARKDOWN
    args => {
        %argspec0_files,
        password => {
            schema => ['str*', min_len=>1],
            req => 1,
        },
        backup => {
            summary => 'Whether to backup the original file to ORIG~',
            schema => 'bool*',
            default => 1,
        },
        # XXX key_length (see qpdf, but when 256 can't be opened by evince)
        # XXX other options (see qpdf)
    },
    deps => {
        prog => 'qpdf',
    },
    links => [
        {url => 'prog:remove-pdf-password'},
    ],
};
sub add_pdf_password {
    #require File::Temp;
    require IPC::System::Options;
    #require Proc::ChildError;
    #require Path::Tiny;

    my %args = @_;

    my $envres = envresmulti();

  FILE:
    for my $f (@{ $args{files} }) {
        unless (-f $f) {
            $envres->add_result(404, "File not found", {item_id=>$f});
            next FILE;
        }
        # XXX test that tempfile doesn't yet exist. but actually we can't avoid
        # race condition because qpdf is another process
        (my $tempf = $f) =~ s/\.pdf$/".tmp_" . int(rand()*900_000 + 100_000) . ".pdf"/ei
            or do {
                $envres->add_result(412, "Cannot set temporary name for $f");
                next FILE;
            };

        my $decrypted;
        my ($stdout, $stderr);
        IPC::System::Options::system(
            {log => 1, capture_stdout => \$stdout, capture_stderr => \$stderr},
            "qpdf", "--encrypt", $args{password}, $args{password}, 128, "--", $f, $tempf);
        my $err = $?;# ? Proc::ChildError::explain_child_error() : '';
        if ($err && $stderr =~ /: invalid password$/) {
            $envres->add_result(412, "File already encrypted", {item_id=>$f});
            next FILE;
        } elsif ($err) {
            $stderr =~ s/\R//g;
            $envres->add_result(500, $stderr, {item_id=>$f});
            next FILE;
        }

      BACKUP:
        {
            last unless $args{backup};
            unless (rename $f, "$f~") {
                warn "Can't backup original '$f' to '$f~': $!, skipped backup\n";
                last;
            };
        }
        unless (rename $tempf, $f) {
            $envres->add_result(500, "Can't rename $tempf to $f: $!", {item_id=>$f});
            next FILE;
        }
        $envres->add_result(200, "OK", {item_id=>$f});
    }

    $envres->as_struct;
}

$SPEC{remove_pdf_password} = {
    v => 1.1,
    summary => 'Remove password from PDF files',
    description => <<'MARKDOWN',

This program is a wrapper for <prog:qpdf> to remove passwords from PDF files
(in-place).

The motivation for this wrapper is the increasing occurence of financial
institutions sending financial statements or documents in the format of
password-protected PDF file. This is annoying when we want to archive the file
or use it in an organization because we have to remember different passwords for
different financial institutions and re-enter the password everytime we want to
use the file. (The banks could've sent the PDF in a password-protected .zip, or
use PGP-encrypted email, but I digress.)

Compared to using **qpdf** directly, this wrapper offers some additional
features/options and convenience, for example: multiple file support, multiple
password matching attempt, configuration file, option whether you want backup,
etc.

You can provide the passwords to be tried in a configuration file,
`~/remove-pdf-password.conf`, e.g.:

    passwords = pass1
    passwords = pass2
    passwords = pass3

or:

    passwords = ["pass1", "pass2", "pass3"]

MARKDOWN
    args => {
        %argspec0_files,
        passwords => {
            schema => ['array*', of=>['str*', min_len=>1], min_len=>1],
        },
        backup => {
            summary => 'Whether to backup the original file to ORIG~',
            schema => 'bool*',
            default => 1,
        },
    },
    deps => {
        prog => 'qpdf',
    },
    links => [
        {url => 'prog:add-pdf-password'},
    ],
};
sub remove_pdf_password {
    #require File::Temp;
    require IPC::System::Options;
    #require Proc::ChildError;
    #require Path::Tiny;

    my %args = @_;

    my $envres = envresmulti();

  FILE:
    for my $f (@{ $args{files} }) {
        unless (-f $f) {
            $envres->add_result(404, "File not found", {item_id=>$f});
            next FILE;
        }
        # XXX test that tempfile doesn't yet exist. but actually we can't avoid
        # race condition because qpdf is another process
        (my $tempf = $f) =~ s/\.pdf$/".tmp_" . int(rand()*900_000 + 100_000) . ".pdf"/ei
            or do {
                $envres->add_result(412, "Cannot set temporary name for $f");
                next FILE;
            };

        my $decrypted;
      PASSWORD:
        for my $p (@{ $args{passwords} }) {
            my ($stdout, $stderr);
            IPC::System::Options::system(
                {log => 1, fail_log_level => 'info', capture_stdout => \$stdout, capture_stderr => \$stderr},
                "qpdf", "--password=$p", "--decrypt", $f, $tempf);
            my $err = $?;# ? Proc::ChildError::explain_child_error() : '';
            if ($err && $stderr =~ /: invalid password$/) {
                #$log->tracef("D1");
                unlink $tempf; # just to make sure
                next PASSWORD;
            } elsif ($err) {
                #$log->tracef("D2");
                $stderr =~ s/\R//g;
                $envres->add_result(500, $stderr, {item_id=>$f});
                next FILE;
            }
            last;
        }
        unless (-f $tempf) {
            $envres->add_result(412, "No passwords can be successfully used on $f", {item_id=>$f});
            next FILE;
        }

      BACKUP:
        {
            last unless $args{backup};
            unless (rename $f, "$f~") {
                warn "Can't backup original '$f' to '$f~': $!, skipped backup\n";
                last;
            };
        }
        unless (rename $tempf, $f) {
            $envres->add_result(500, "Can't rename $tempf to $f: $!", {item_id=>$f});
            next FILE;
        }
        $envres->add_result(200, "OK", {item_id=>$f});
    }

    $envres->as_struct;
}

$SPEC{pdf_has_password} = {
    v => 1.1,
    summary => 'Check if PDF file has password',
    description => <<'MARKDOWN',

This is a wrapper for `qpdf --check`. The wrapper offers additional options like
`--quiet``.

lib/App/PDFUtils.pm  view on Meta::CPAN

    % gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/screen -dNOPAUSE -dQUIET -dBATCH -sOutputFile=output.pdf input.pdf

This wrapper offers support for multiple files and automatically naming output
`INPUT.compressed.pdf` by default.

MARKDOWN
    args => {
        %argspec0_files,
        %argspecopt_overwrite,
        setting => {
            schema => ['str*', {
                in => [
                    'screen',
                    'ebook',
                    'prepress',
                    'printer',
                    'default',
                ],
                'x.in.summaries' => [
                    'Has a lower quality and smaller size (72 dpi)',
                    'Has a better quality, but has a slightly larger size (150 dpi)',
                    'Output is of a higher size and quality (300 dpi)',
                    'Output is of a printer type quality (300 dpi)',
                    'Selects the output which is useful for multiple purposes, can cause large PDFS',
                ],
            }],
            default => 'ebook',
            cmdline_aliases => {s=>{}},
        },
    },
    examples => [
        {
            summary => 'Compress foo.pdf into foo.compressed.pdf using default setting (ebook - 150dpi)',
            test => 0,
            src => '[[prog]] foo.pdf',
            src_plang => 'bash',
            'x.doc.show_result' => 0,
        },
        {
            summary => 'Compress two files with more extreme compression (screen - 72dpi), overwrite existing output',
            test => 0,
            src => '[[prog]] -O -s screen foo.pdf bar.pdf',
            src_plang => 'bash',
            'x.doc.show_result' => 0,
        },
    ],
    deps => {
        prog => 'gs',
    },
};
sub compress_pdf {
    require IPC::System::Options;

    my %args = @_;

    my $envres = envresmulti();

  FILE:
    for my $f (@{ $args{files} }) {
        unless (-f $f) {
            $envres->add_result(404, "File not found", {item_id=>$f});
            next FILE;
        }
        my $outputf = $f;
        $outputf =~ s/\.(pdf)\z/.compressed.$1/i or do {
            $envres->add_result(500, "Cannot determine output filename", {item_id=>$f});
            next FILE;
        };
        if ((-f $outputf) && !$args{overwrite}) {
            $envres->add_result(412, "Won't overwrite existing output $outputf", {item_id=>$f});
            next FILE;
        }

        IPC::System::Options::system(
            {log => 1},
            "gs", "-sDEVICE=pdfwrite", "-dCompatibilityLevel=1.4", "-dPDFSETTINGS=/$args{setting}", "-dNOPAUSE", "-dQUIET", "-dBATCH", "-sOutputFile=$outputf", $f,
        );
        if ($?) {
            $envres->add_result(500, "Failed", {item_id=>$f});
        } else {
            $envres->add_result(200, "OK", {item_id=>$f});
        }
    }

    $envres->as_struct;
}

1;
# ABSTRACT: Command-line utilities related to PDF files

__END__

=pod

=encoding UTF-8

=head1 NAME

App::PDFUtils - Command-line utilities related to PDF files

=head1 VERSION

This document describes version 0.017 of App::PDFUtils (from Perl distribution App-PDFUtils), released on 2026-02-02.

=head1 SYNOPSIS

=head1 DESCRIPTION

This distribution provides tha following command-line utilities related to PDF
files:

=over

=item 1. L<add-pdf-password>

=item 2. L<compress-pdf>

=item 3. L<grep-from-pdf>

=item 4. L<less-pdf-text>



( run in 1.925 second using v1.01-cache-2.11-cpan-39bf76dae61 )