CPAN-Checksums

 view release on metacpan or  search on metacpan

lib/CPAN/Checksums.pm  view on Meta::CPAN

# -*- cperl-indent-level: 2  -*-
package CPAN::Checksums;

use strict;
use vars qw(
            $CAUTION
            $DIRNAME
            $IGNORE_MATCH
            $MIN_MTIME_CHECKSUMS
            $SIGNING_KEY
            $SIGNING_PROGRAM
            $TRY_SHORTNAME
            $VERSION
            @EXPORT_OK
            @ISA
           );

require Exporter;

@ISA = qw(Exporter);
@EXPORT_OK = qw(updatedir);
$VERSION = "2.14";
$VERSION =~ s/_//;
$CAUTION ||= 0;
$TRY_SHORTNAME ||= 0;
$SIGNING_PROGRAM ||= 'gpg --clearsign --default-key ';
$SIGNING_KEY ||= '';
$MIN_MTIME_CHECKSUMS ||= 0;
$IGNORE_MATCH = qr{(?i-xsm:readme$)};

use DirHandle ();
use IO::File ();
use Digest::MD5 ();
use Compress::Bzip2();
use Compress::Zlib ();
use File::Spec ();
use File::Temp;
use Data::Dumper ();
use Data::Compare ();
use Digest::SHA ();

sub _dir_to_dref {
  my($dirname,$old_dref,$root) = @_;
  my $cpan_path = File::Spec->abs2rel( $dirname, $root ) ;
  my($dref) = {};
  my($dh)= DirHandle->new;
  my($fh) = new IO::File;
  $dh->open($dirname) or die "Couldn't opendir $dirname\: $!";
  my(%shortnameseen);
 DIRENT: for my $de ($dh->read) {
    next DIRENT if $de =~ /^\./;
    next DIRENT if substr($de,0,9) eq "CHECKSUMS";
    next DIRENT if $IGNORE_MATCH && $de =~ $IGNORE_MATCH;

    my $abs = File::Spec->catfile($dirname,$de);

    #
    # SHORTNAME offers an 8.3 name, probably not needed but it was
    # always there,,,
    #
    if ($TRY_SHORTNAME) {
      my $shortname = lc $de;
      $shortname =~ s/\.tar[._-]gz$/\.tgz/;
      my $suffix;
      ($suffix = $shortname) =~ s/.*\.//;
      substr($suffix,3) = "" if length($suffix) > 3;
      my @p;
      if ($shortname =~ /\-/) {
        @p = $shortname =~ /(.{1,16})-.*?([\d\.]{2,8})/;
      } else {
        @p = $shortname =~ /(.{1,8}).*?([\d\.]{2,8})/;
      }
      $p[0] ||= lc $de;
      $p[0] =~ s/[^a-z0-9]//g;
      $p[1] ||= 0;
      $p[1] =~ s/\D//g;
      my $counter = 7;
      while (length($p[0]) + length($p[1]) > 8) {
        substr($p[0], $counter) = "" if length($p[0]) > $counter;
        substr($p[1], $counter) = "" if length($p[1]) > $counter--;
      }
      my $dot = $suffix ? "." : "";
      $shortname = "$p[0]$p[1]$dot$suffix";
      while (exists $shortnameseen{$shortname}) {
        my($modi) = $shortname =~ /([a-z\d]+)/;
        $modi++;
        $shortname = "$modi$dot$suffix";
        if (++$counter > 1000){ # avoid endless loops and accept the buggy choice
          warn "Warning: long loop on shortname[$shortname]de[$de]";
          last;
        }
      }
      $dref->{$de}->{shortname} = $shortname;
      $shortnameseen{$shortname} = undef; # for exists check good enough
    }

    #
    # STAT facts
    #
    if (-l File::Spec->catdir($dirname,$de)){
      # Symlinks are a mess on a replicated, database driven system,
      # but as they are not forbidden, we cannot ignore them. We do
      # have a directory with nothing but a symlink in it. When we
      # ignored the symlink, we did not write a CHECKSUMS file and
      # CPAN.pm issued lots of warnings:-(
      $dref->{$de}{issymlink} = 1;
    }
    if (-d File::Spec->catdir($dirname,$de)){
      $dref->{$de}{isdir} = 1;
    } else {
      my @stat = stat $abs or next DIRENT;
      $dref->{$de}{size} = $stat[7];
      my(@gmtime) = gmtime $stat[9];
      $gmtime[4]++;
      $gmtime[5]+=1900;
      $dref->{$de}{mtime} = sprintf "%04d-%02d-%02d", @gmtime[5,4,3];
      _add_digests($de,$dref,"Digest::SHA",[256],"sha256",$abs,$old_dref);
      my $can_reuse_old_md5 = 1;
    COMPARE: for my $param (qw(size mtime sha256)) {
        if (!exists $old_dref->{$de}{$param} ||
            $dref->{$de}{$param} ne $old_dref->{$de}{$param}) {
          $can_reuse_old_md5 = 0;
          last COMPARE;
        }
      }
      if ($can_reuse_old_md5
          and $de =~ /\.(gz|tgz|bz2|tbz)$/
          and exists $old_dref->{$de}{md5}
          and !exists $old_dref->{$de}{"md5-ungz"}
          and !exists $old_dref->{$de}{"md5-unbz2"}
         ) {
        $can_reuse_old_md5 = 0;
      }
      if ( $can_reuse_old_md5 ) {
      MD5KEY: for my $param (qw(md5 md5-ungz md5-unbz2)) {
          next MD5KEY unless exists $old_dref->{$de}{$param};
          $dref->{$de}{$param} = $old_dref->{$de}{$param};
        }
      } else {
        _add_digests($de,$dref,"Digest::MD5",[],"md5",$abs,$old_dref);
      }

    } # ! -d
    $dref->{$de}{cpan_path} = $cpan_path;

  }
  $dh->close;
  $dref;
}

sub _read_old_ddump {
  my($ckfn) = @_;
  my $is_signed = 0;
  my($fh) = new IO::File;
  my $old_ddump = "";
  if ($fh->open($ckfn)) {
    local $/ = "\n";
    while (<$fh>) {
      next if /^\#/;
      $is_signed = 1 if /SIGNED MESSAGE/;
      $old_ddump .= $_;
    }
    close $fh;
  }
  return($old_ddump,$is_signed);
}

sub updatedir ($;$) {
  my($dirname, $root) = @_;
  my $ckfn = File::Spec->catfile($dirname, "CHECKSUMS"); # checksum-file-name
  my($old_ddump,$is_signed) = _read_old_ddump($ckfn);
  my($old_dref) = makehashref($old_ddump);
  my $dref = _dir_to_dref($dirname,$old_dref,$root);
  local $Data::Dumper::Indent = 1;
  local $Data::Dumper::Quotekeys = 1;
  local $Data::Dumper::Sortkeys = 1;
  my $ddump = Data::Dumper->new([$dref],["cksum"])->Dump;
  my @ckfnstat = stat $ckfn;
  if ($old_ddump) {
    local $DIRNAME = $dirname;
    if ( !!$SIGNING_KEY == !!$is_signed ) { # either both or neither
      if (!$MIN_MTIME_CHECKSUMS || $ckfnstat[9] > $MIN_MTIME_CHECKSUMS ) {
        # recent enough
        return 1 if $old_ddump eq $ddump;
        return 1 if ckcmp($old_dref,$dref);
      }
    }
    if ($CAUTION) {
      my $report = investigate($old_dref,$dref);
      warn $report if $report;
    }
  }
  my $ft = File::Temp->new(
                           DIR => $dirname,
                           TEMPLATE => "CHECKSUMS.XXXX",
                           CLEANUP => 0,
                          ) or die;
  my $tckfn = $ft->filename;
  close $ft;
  my($fh) = new IO::File;
  open $fh, ">$tckfn\0" or die "Couldn't open >$tckfn\: $!";

  local $\;
  if ($SIGNING_KEY) {

lib/CPAN/Checksums.pm  view on Meta::CPAN

      for my $diff (qw/md5 sha256 size md5-ungz sha256-ungz mtime/) {
        next unless exists $old->{$dist}{$diff} &&
            exists $new->{$dist}{$diff};
        next if $old->{$dist}{$diff} eq $new->{$dist}{$diff};
        $complain .=
            scalar gmtime().
                " GMT:\ndiffering old/new version of same file $dist:\n"
                    unless $headersaid++;
        $complain .=
            qq{\t$diff "$old->{$dist}{$diff}" -> "$new->{$dist}{$diff}"\n}; #};
      }
    }
  }
  $complain;
}

sub makehashref ($) {
  local($_) = shift;
  unless (ref $_ eq "HASH") {
    require Safe;
    my($comp) = Safe->new("CPAN::Checksums::reval");
    my $cksum; # used by Data::Dumper
    $_ = $comp->reval($_) || {};
    die "CPAN::Checksums: Caught error[$@] while checking $DIRNAME" if $@;
  }
  $_;
}

1;

__END__

=head1 NAME

CPAN::Checksums - Write a C<CHECKSUMS> file for a directory as on CPAN

=head1 SYNOPSIS

  use CPAN::Checksums qw(updatedir);
  my $success = updatedir($directory, $root);

=head1 INCOMPATIBILITY ALERT

Since version 1.0 the generation of the attribute C<shortname> is
turned off by default. It was too slow and was not used as far as I
know, and above all, it could fail on large directories. The shortname
feature can still be turned on by setting the global variable
$TRY_SHORTNAME to a true value.

=head1 DESCRIPTION

=over 2

=item $success = updatedir($dir[, $root])

$dir is a directory. Updatedir() writes a C<CHECKSUMS> file into that
directory, unless a previously written C<CHECKSUMS> file is there that
is still valid. Returns 2 if a new C<CHECKSUMS> file has been written,
1 if a valid C<CHECKSUMS> file is already there, otherwise dies.

If $root is given, the hash entry with the key C<cpan_path> is
relative to this root directory.

Note: since version 2.0 updatedir on empty directories behaves just
the same. In older versions it silently did nothing.

=back

=head2 Global Variables in package CPAN::Checksums

=over

=item $IGNORE_MATCH

If the global variable $IGNORE_MATCH is set, then all files matching
this expression will be completely ignored and will not be included in
the CPAN C<CHECKSUMS> files. Per default this variable is set to

    qr{(?i-xsm:readme$)}

=item $CAUTION

Setting the global variable $CAUTION causes updatedir() to report
changes of files in the attributes C<size>, C<mtime>, C<md5>, or
C<md5-ungz> to STDERR.

=item $TRY_SHORTNAME

By setting the global variable $TRY_SHORTNAME to a true value, you can
tell updatedir() to include an attribute C<shortname> in the resulting
hash that is 8.3-compatible. Please note, that updatedir() in this
case may be slow and may even fail on large directories, because it
will always only try 1000 iterations to find a name that is not yet
taken and then give up.

=item $SIGNING_KEY

Setting the global variable $SIGNING_KEY makes the generated C<CHECKSUMS>
file to be clear-signed by the command specified in $SIGNING_PROGRAM
(defaults to C<gpg --clearsign --default-key >), passing the signing
key as an extra argument.  The resulting C<CHECKSUMS> file should look like:

    0&&<<''; # this PGP-signed message is also valid perl
    -----BEGIN PGP SIGNED MESSAGE-----
    Hash: SHA1

    # CHECKSUMS file written on ... by CPAN::Checksums (v...)
    $cksum = {
	...
    };

    __END__
    -----BEGIN PGP SIGNATURE-----
    ...
    -----END PGP SIGNATURE-----

note that the actual data remains intact, but two extra lines are
added to make it legal for both OpenPGP and perl syntax.

=item $MIN_MTIME_CHECKSUMS



( run in 1.641 second using v1.01-cache-2.11-cpan-39bf76dae61 )