IO-Compress

 view release on metacpan or  search on metacpan

lib/IO/Uncompress/Unzip.pm  view on Meta::CPAN

{
    my $buffer = shift ;
    return 0 if length $buffer < 4 ;
    my $sig = unpack("V", $buffer) ;
    return $sig == ZIP_LOCAL_HDR_SIG ;
}


sub _readFullZipHeader($)
{
    my ($self) = @_ ;
    my $magic = '' ;

    $self->smartReadExact(\$magic, 4);

    *$self->{HeaderPending} = $magic ;

    return $self->HeaderError("Minimum header size is " .
                              30 . " bytes")
        if length $magic != 4 ;


    return $self->HeaderError("Bad Magic")
        if ! _isZipMagic($magic) ;

    my $status = $self->_readZipHeader($magic);
    delete *$self->{Transparent} if ! defined $status ;
    return $status ;
}

sub _readZipHeader($)
{
    my ($self, $magic) = @_ ;
    my ($HeaderCRC) ;
    my ($buffer) = '' ;

    $self->smartReadExact(\$buffer, 30 - 4)
        or return $self->HeaderError("Minimum header size is " .
                                     30 . " bytes") ;

    my $keep = $magic . $buffer ;
    *$self->{HeaderPending} = $keep ;

    my $extractVersion     = unpack ("v", substr($buffer, 4-4,  2));
    my $gpFlag             = unpack ("v", substr($buffer, 6-4,  2));
    my $compressedMethod   = unpack ("v", substr($buffer, 8-4,  2));
    my $lastModTime        = unpack ("V", substr($buffer, 10-4, 4));
    my $crc32              = unpack ("V", substr($buffer, 14-4, 4));
    my $compressedLength   = U64::newUnpack_V32 substr($buffer, 18-4, 4);
    my $uncompressedLength = U64::newUnpack_V32 substr($buffer, 22-4, 4);
    my $filename_length    = unpack ("v", substr($buffer, 26-4, 2));
    my $extra_length       = unpack ("v", substr($buffer, 28-4, 2));

    my $filename;
    my $extraField;
    my @EXTRA = ();

    # Some programs (some versions of LibreOffice) mark entries as streamed, but still fill out
    # compressedLength/uncompressedLength & crc32 in the local file header.
    # The expected data descriptor is not populated.
    # So only assume streaming if the Streaming bit is set AND the compressed length is zero
    my $streamingMode = (($gpFlag & ZIP_GP_FLAG_STREAMING_MASK)  && $crc32 == 0) ? 1 : 0 ;

    my $efs_flag = ($gpFlag & ZIP_GP_FLAG_LANGUAGE_ENCODING) ? 1 : 0;

    return $self->HeaderError("Encrypted content not supported")
        if $gpFlag & (ZIP_GP_FLAG_ENCRYPTED_MASK|ZIP_GP_FLAG_STRONG_ENCRYPTED_MASK);

    return $self->HeaderError("Patch content not supported")
        if $gpFlag & ZIP_GP_FLAG_PATCHED_MASK;

    *$self->{ZipData}{Streaming} = $streamingMode;


    if ($filename_length)
    {
        $self->smartReadExact(\$filename, $filename_length)
            or return $self->TruncatedHeader("Filename");

        if (*$self->{UnzipData}{efs} && $efs_flag && $] >= 5.008004)
        {
            require Encode;
            eval { $filename = Encode::decode_utf8($filename, 1) }
                or Carp::croak "Zip Filename not UTF-8" ;
        }

        $keep .= $filename ;
    }

    my $zip64 = 0 ;

    if ($extra_length)
    {
        $self->smartReadExact(\$extraField, $extra_length)
            or return $self->TruncatedHeader("Extra Field");

        my $bad = IO::Compress::Zlib::Extra::parseRawExtra($extraField,
                                                \@EXTRA, 1, 0);
        return $self->HeaderError($bad)
            if defined $bad;

        $keep .= $extraField ;

        my %Extra ;
        for (@EXTRA)
        {
            $Extra{$_->[0]} = \$_->[1];
        }

        if (defined $Extra{ZIP_EXTRA_ID_ZIP64()})
        {
            $zip64 = 1 ;

            my $buff = ${ $Extra{ZIP_EXTRA_ID_ZIP64()} };

            # This code assumes that all the fields in the Zip64
            # extra field aren't necessarily present. The spec says that
            # they only exist if the equivalent local headers are -1.

            if (! $streamingMode) {
                my $offset = 0 ;

                if (U64::full32 $uncompressedLength->get32bit() ) {
                    $uncompressedLength
                            = U64::newUnpack_V64 substr($buff, 0, 8);

                    $offset += 8 ;
                }

                if (U64::full32 $compressedLength->get32bit() ) {

                    $compressedLength
                        = U64::newUnpack_V64 substr($buff, $offset, 8);

                    $offset += 8 ;
                }
           }
        }
    }

    *$self->{ZipData}{Zip64} = $zip64;

    if (! $streamingMode) {
        *$self->{ZipData}{Streaming} = 0;
        *$self->{ZipData}{Crc32} = $crc32;
        *$self->{ZipData}{CompressedLen} = $compressedLength;
        *$self->{ZipData}{UnCompressedLen} = $uncompressedLength;
        *$self->{CompressedInputLengthRemaining} =
            *$self->{CompressedInputLength} = $compressedLength->get64bit();
    }

    *$self->{ZipData}{CRC32} = Compress::Raw::Zlib::crc32(undef);
    *$self->{ZipData}{Method} = $compressedMethod;
    if ($compressedMethod == ZIP_CM_DEFLATE)
    {
        *$self->{Type} = 'zip-deflate';
        my $obj = IO::Uncompress::Adapter::Inflate::mkUncompObject(1,0,0);

        *$self->{Uncomp} = $obj;
    }
    elsif ($compressedMethod == ZIP_CM_BZIP2)
    {
        return $self->HeaderError("Unsupported Compression format $compressedMethod")
            if ! defined $IO::Uncompress::Adapter::Bunzip2::VERSION ;

        *$self->{Type} = 'zip-bzip2';

        my $obj = IO::Uncompress::Adapter::Bunzip2::mkUncompObject();

        *$self->{Uncomp} = $obj;
    }
    elsif ($compressedMethod == ZIP_CM_XZ)
    {
        return $self->HeaderError("Unsupported Compression format $compressedMethod")
            if ! defined $IO::Uncompress::Adapter::UnXz::VERSION ;

        *$self->{Type} = 'zip-xz';

        my $obj = IO::Uncompress::Adapter::UnXz::mkUncompObject();

        *$self->{Uncomp} = $obj;
    }
    elsif ($compressedMethod == ZIP_CM_ZSTD)
    {
        return $self->HeaderError("Unsupported Compression format $compressedMethod")
            if ! defined $IO::Uncompress::Adapter::UnZstd::VERSION ;

        *$self->{Type} = 'zip-zstd';

        my $obj = IO::Uncompress::Adapter::UnZstd::mkUncompObject();

        *$self->{Uncomp} = $obj;
    }
    elsif ($compressedMethod == ZIP_CM_LZMA)
    {
        return $self->HeaderError("Unsupported Compression format $compressedMethod")
            if ! defined $IO::Uncompress::Adapter::UnLzma::VERSION ;

        *$self->{Type} = 'zip-lzma';
        my $LzmaHeader;
        $self->smartReadExact(\$LzmaHeader, 4)
                or return $self->saveErrorString(undef, "Truncated file");
        my ($verHi, $verLo)   = unpack ("CC", substr($LzmaHeader, 0, 2));
        my $LzmaPropertiesSize   = unpack ("v", substr($LzmaHeader, 2, 2));


        my $LzmaPropertyData;
        $self->smartReadExact(\$LzmaPropertyData, $LzmaPropertiesSize)
                or return $self->saveErrorString(undef, "Truncated file");

        if (! $streamingMode) {
            *$self->{ZipData}{CompressedLen}->subtract(4 + $LzmaPropertiesSize) ;
            *$self->{CompressedInputLengthRemaining} =
                *$self->{CompressedInputLength} = *$self->{ZipData}{CompressedLen}->get64bit();
        }

        my $obj =
            IO::Uncompress::Adapter::UnLzma::mkUncompZipObject($LzmaPropertyData);

        *$self->{Uncomp} = $obj;
    }
    elsif ($compressedMethod == ZIP_CM_STORE)
    {
        *$self->{Type} = 'zip-stored';

        my $obj =
        IO::Uncompress::Adapter::Identity::mkUncompObject($streamingMode,
                                                          $zip64);

        *$self->{Uncomp} = $obj;
    }
    else
    {
        return $self->HeaderError("Unsupported Compression format $compressedMethod");
    }

    return {
        'Type'               => 'zip',
        'FingerprintLength'  => 4,
        #'HeaderLength'       => $compressedMethod == 8 ? length $keep : 0,
        'HeaderLength'       => length $keep,
        'Zip64'              => $zip64,
        'TrailerLength'      => ! $streamingMode ? 0 : $zip64 ? 24 : 16,
        'Header'             => $keep,
        'CompressedLength'   => $compressedLength ,
        'UncompressedLength' => $uncompressedLength ,
        'CRC32'              => $crc32 ,
        'Name'               => $filename,
        'efs'                => $efs_flag, # language encoding flag
        'Time'               => _dosToUnixTime($lastModTime),
        'Stream'             => $streamingMode,

        'MethodID'           => $compressedMethod,
        'MethodName'         => $MethodNames{$compressedMethod} || 'Unknown',

#        'TextFlag'      => $flag & GZIP_FLG_FTEXT ? 1 : 0,
#        'HeaderCRCFlag' => $flag & GZIP_FLG_FHCRC ? 1 : 0,
#        'NameFlag'      => $flag & GZIP_FLG_FNAME ? 1 : 0,
#        'CommentFlag'   => $flag & GZIP_FLG_FCOMMENT ? 1 : 0,
#        'ExtraFlag'     => $flag & GZIP_FLG_FEXTRA ? 1 : 0,
#        'Comment'       => $comment,
#        'OsID'          => $os,
#        'OsName'        => defined $GZIP_OS_Names{$os}
#                                 ? $GZIP_OS_Names{$os} : "Unknown",
#        'HeaderCRC'     => $HeaderCRC,
#        'Flags'         => $flag,
#        'ExtraFlags'    => $xfl,
        'ExtraFieldRaw' => $extraField,
        'ExtraField'    => [ @EXTRA ],


      }
}

sub filterUncompressed
{
    my $self = shift ;

    if (*$self->{ZipData}{Method} == ZIP_CM_DEFLATE) {
        *$self->{ZipData}{CRC32} = *$self->{Uncomp}->crc32() ;
    }
    else {
        *$self->{ZipData}{CRC32} = Compress::Raw::Zlib::crc32(${$_[0]}, *$self->{ZipData}{CRC32}, $_[1]);
    }
}


# from Archive::Zip & info-zip
sub _dosToUnixTime
{
    # Returns zero when $dt is already zero or it doesn't expand to a value that Time::Local::timelocal()
    # can handle.

	my $dt = shift;
    # warn "_dosToUnixTime dt=[$dt]\n";

    # some zip files don't populate the datetime field at all
    return 0 if ! $dt;

	my $year = ( ( $dt >> 25 ) & 0x7f ) + 80;
	my $mon  = ( ( $dt >> 21 ) & 0x0f ) - 1;
	my $mday = ( ( $dt >> 16 ) & 0x1f );

	my $hour = ( ( $dt >> 11 ) & 0x1f );
	my $min  = ( ( $dt >> 5 ) & 0x3f );
	my $sec  = ( ( $dt << 1 ) & 0x3e );

    use Time::Local ;

    my $time_t ;
    # wrap in an eval to catch out of range errors

lib/IO/Uncompress/Unzip.pm  view on Meta::CPAN


__END__


=head1 NAME

IO::Uncompress::Unzip - Read zip files/buffers

=head1 SYNOPSIS

    use IO::Uncompress::Unzip qw(unzip $UnzipError) ;

    my $status = unzip $input => $output [,OPTS]
        or die "unzip failed: $UnzipError\n";

    my $z = IO::Uncompress::Unzip->new( $input [OPTS] )
        or die "unzip failed: $UnzipError\n";

    $status = $z->read($buffer)
    $status = $z->read($buffer, $length)
    $status = $z->read($buffer, $length, $offset)
    $line = $z->getline()
    $char = $z->getc()
    $char = $z->ungetc()
    $char = $z->opened()

    $status = $z->inflateSync()

    $data = $z->trailingData()
    $status = $z->nextStream()
    $data = $z->getHeaderInfo()
    $z->tell()
    $z->seek($position, $whence)
    $z->binmode()
    $z->fileno()
    $z->eof()
    $z->close()

    $UnzipError ;

    # IO::File mode

    <$z>
    read($z, $buffer);
    read($z, $buffer, $length);
    read($z, $buffer, $length, $offset);
    tell($z)
    seek($z, $position, $whence)
    binmode($z)
    fileno($z)
    eof($z)
    close($z)

=head1 DESCRIPTION

This module provides a Perl interface that allows the reading of
zlib files/buffers.

For writing zip files/buffers, see the companion module IO::Compress::Zip.

The primary purpose of this module is to provide I<streaming> read access to
zip files and buffers.

At present the following compression methods are supported by IO::Uncompress::Unzip

=over 5

=item Store (0)

=item Deflate (8)

=item Bzip2 (12)

To read Bzip2 content, the module C<IO::Uncompress::Bunzip2> must
be installed.

=item Lzma (14)

To read LZMA content, the module C<IO::Uncompress::UnLzma> must
be installed.

=item Xz (95)

To read Xz content, the module C<IO::Uncompress::UnXz> must
be installed.

=item Zstandard (93)

To read Zstandard content, the module C<IO::Uncompress::UnZstd> must
be installed.

=back

=head1 Functional Interface

A top-level function, C<unzip>, is provided to carry out
"one-shot" uncompression between buffers and/or files. For finer
control over the uncompression process, see the L</"OO Interface">
section.

    use IO::Uncompress::Unzip qw(unzip $UnzipError) ;

    unzip $input_filename_or_reference => $output_filename_or_reference [,OPTS]
        or die "unzip failed: $UnzipError\n";

The functional interface needs Perl5.005 or better.

=head2 unzip $input_filename_or_reference => $output_filename_or_reference [, OPTS]

C<unzip> expects at least two parameters,
C<$input_filename_or_reference> and C<$output_filename_or_reference>
and zero or more optional parameters (see L</Optional Parameters>)

=head3 The C<$input_filename_or_reference> parameter

The parameter, C<$input_filename_or_reference>, is used to define the
source of the compressed data.

It can take one of the following forms:

=over 5



( run in 0.932 second using v1.01-cache-2.11-cpan-39bf76dae61 )