Archive-Tar
view release on metacpan or search on metacpan
lib/Archive/Tar.pm view on Meta::CPAN
# straight from the tarball
if( not defined $alt and
not $INSECURE_EXTRACT_MODE
) {
### paths that leave the current directory are not allowed under
### strict mode, so only allow it if a user tells us to do this.
if( grep { $_ eq '..' } @dirs ) {
$self->_error(
q[Entry ']. $entry->full_path .q[' is attempting to leave ].
q[the current working directory. Not extracting under ].
q[SECURE EXTRACT MODE]
);
return;
}
### the archive may be asking us to extract into a symlink. This
### is not sane and a possible security issue, as outlined here:
### https://rt.cpan.org/Ticket/Display.html?id=30380
### https://bugzilla.redhat.com/show_bug.cgi?id=295021
### https://issues.rpath.com/browse/RPL-1716
my $full_path = $cwd;
for my $d ( @dirs ) {
$full_path = File::Spec->catdir( $full_path, $d );
### we've already checked this one, and it's safe. Move on.
next if ref $self and $self->{_link_cache}->{$full_path};
if( -l $full_path ) {
my $to = readlink $full_path;
my $diag = "symlinked directory ($full_path => $to)";
$self->_error(
q[Entry ']. $entry->full_path .q[' is attempting to ].
qq[extract to a $diag. This is considered a security ].
q[vulnerability and not allowed under SECURE EXTRACT ].
q[MODE]
);
return;
}
### XXX keep a cache if possible, so the stats become cheaper:
$self->{_link_cache}->{$full_path} = 1 if ref $self;
}
}
### '.' is the directory delimiter on VMS, which has to be escaped
### or changed to '_' on vms. vmsify is used, because older versions
### of vmspath do not handle this properly.
### Must not add a '/' to an empty directory though.
map { length() ? VMS::Filespec::vmsify($_.'/') : $_ } @dirs if ON_VMS;
my ($cwd_vol,$cwd_dir,$cwd_file)
= File::Spec->splitpath( $cwd );
my @cwd = File::Spec->splitdir( $cwd_dir );
push @cwd, $cwd_file if length $cwd_file;
### We need to pass '' as the last element to catpath. Craig Berry
### explains why (msgid <p0624083dc311ae541393@[172.16.52.1]>):
### The root problem is that splitpath on UNIX always returns the
### final path element as a file even if it is a directory, and of
### course there is no way it can know the difference without checking
### against the filesystem, which it is documented as not doing. When
### you turn around and call catpath, on VMS you have to know which bits
### are directory bits and which bits are file bits. In this case we
### know the result should be a directory. I had thought you could omit
### the file argument to catpath in such a case, but apparently on UNIX
### you can't.
$dir = File::Spec->catpath(
$cwd_vol, File::Spec->catdir( @cwd, @dirs ), ''
);
### catdir() returns undef if the path is longer than 255 chars on
### older VMS systems.
unless ( defined $dir ) {
$^W && $self->_error( qq[Could not compose a path for '$dirs'\n] );
return;
}
}
if( -e $dir && !-d _ ) {
$^W && $self->_error( qq['$dir' exists, but it's not a directory!\n] );
return;
}
unless ( -d _ ) {
eval { File::Path::mkpath( $dir, 0, 0777 ) };
if( $@ ) {
my $fp = $entry->full_path;
$self->_error(qq[Could not create directory '$dir' for '$fp': $@]);
return;
}
### XXX chown here? that might not be the same as in the archive
### as we're only chown'ing to the owner of the file we're extracting
### not to the owner of the directory itself, which may or may not
### be another entry in the archive
### Answer: no, gnu tar doesn't do it either, it'd be the wrong
### way to go.
#if( $CHOWN && CAN_CHOWN ) {
# chown $entry->uid, $entry->gid, $dir or
# $self->_error( qq[Could not set uid/gid on '$dir'] );
#}
}
### we're done if we just needed to create a dir ###
return 1 if $entry->is_dir;
my $full = File::Spec->catfile( $dir, $file );
if( $entry->is_unknown ) {
$self->_error( qq[Unknown file type for file '$full'] );
return;
}
### If a file system already contains a block device with the same name as
### the being extracted regular file, we would write the file's content
### to the block device. So remove the existing file (block device) now.
### If an archive contains multiple same-named entries, the last one
### should replace the previous ones. So remove the old file now.
### If the old entry is a symlink to a file outside of the CWD, the new
### entry would create a file there. This is CVE-2018-12015
### <https://rt.cpan.org/Ticket/Display.html?id=125523>.
if (-l $full || -e _) {
if (!unlink $full) {
$self->_error( qq[Could not remove old file '$full': $!] );
return;
}
}
if( length $entry->type && $entry->is_file ) {
my $fh = IO::File->new;
$fh->open( $full, '>' ) or (
$self->_error( qq[Could not open file '$full': $!] ),
return
);
if( $entry->size ) {
binmode $fh;
my $offset = 0;
my $content = $entry->get_content_by_ref();
while ($offset < $entry->size) {
my $written
= syswrite $fh, $$content, $EXTRACT_BLOCK_SIZE, $offset;
if (defined $written) {
$offset += $written;
} else {
$self->_error( qq[Could not write data to '$full': $!] );
return;
}
}
}
close $fh or (
$self->_error( qq[Could not close file '$full'] ),
return
);
} else {
$self->_make_special_file( $entry, $full ) or return;
}
### only update the timestamp if it's not a symlink; that will change the
### timestamp of the original. This addresses bug #33669: Could not update
### timestamp warning on symlinks
if( not -l $full and not ( $entry->is_hardlink and ON_UNIX and $EXTRACT_HARDLINK ) ) {
utime time, $entry->mtime - TIME_OFFSET, $full or
$self->_error( qq[Could not update timestamp] );
}
if( $CHOWN && CAN_CHOWN->() and not -l $full and not ( $entry->is_hardlink and ON_UNIX and $EXTRACT_HARDLINK ) ) {
CORE::chown( $entry->uid, $entry->gid, $full ) or
$self->_error( qq[Could not set uid/gid on '$full'] );
}
### only chmod if we're allowed to, but never chmod symlinks, since they'll
### change the perms on the file they're linking too...
if( $CHMOD and not -l $full and not ( $entry->is_hardlink and ON_UNIX and $EXTRACT_HARDLINK ) ) {
my $mode = $entry->mode;
unless ($SAME_PERMISSIONS) {
$mode &= ~(oct(7000) | umask);
}
CORE::chmod( $mode, $full ) or
$self->_error( qq[Could not chown '$full' to ] . $entry->mode );
}
return 1;
}
sub _make_special_file {
my $self = shift;
my $entry = shift or return;
my $file = shift; return unless defined $file;
my $err;
if( $entry->is_symlink ) {
if( !$INSECURE_EXTRACT_MODE ) {
my $linkname = $entry->linkname;
if( File::Spec->file_name_is_absolute($linkname) ) {
$self->_error( qq[Symlink '] . $entry->full_path .
qq[' has absolute target. Not extracting under SECURE EXTRACT MODE] );
return;
}
if( !defined _symlinks_resolver( $entry->full_path, $linkname, 1 ) ) {
$self->_error( qq[Symlink '] . $entry->full_path .
qq[' target attempts traversal. Not extracting under SECURE EXTRACT MODE] );
return;
}
}
my $fail;
if( ON_UNIX ) {
symlink( $entry->linkname, $file ) or $fail++;
} else {
$self->_extract_special_file_as_plain_file( $entry, $file )
or $fail++;
}
$err = qq[Making symbolic link '$file' to '] .
$entry->linkname .q[' failed] if $fail;
} elsif ( $entry->is_hardlink ) {
if( !$INSECURE_EXTRACT_MODE ) {
my $linkname = $entry->linkname;
if( File::Spec->file_name_is_absolute($linkname) ) {
$self->_error( qq[Hardlink '] . $entry->full_path .
qq[' has absolute target '$linkname'. Not extracting ] .
qq[under SECURE EXTRACT MODE: extraction itself chmods ] .
qq[the shared inode.] );
return;
}
if( !defined _symlinks_resolver( $entry->full_path, $linkname, 1 ) ) {
lib/Archive/Tar.pm view on Meta::CPAN
$err = qq[Making hard link from '] . $entry->linkname .
qq[' to '$file' failed] if $fail;
} elsif ( $entry->is_fifo ) {
ON_UNIX && !system('mknod', $file, 'p') or
$err = qq[Making fifo ']. $entry->name .qq[' failed];
} elsif ( $entry->is_blockdev or $entry->is_chardev ) {
my $mode = $entry->is_blockdev ? 'b' : 'c';
ON_UNIX && !system('mknod', $file, $mode,
$entry->devmajor, $entry->devminor) or
$err = qq[Making block device ']. $entry->name .qq[' (maj=] .
$entry->devmajor . qq[ min=] . $entry->devminor .
qq[) failed.];
} elsif ( $entry->is_socket ) {
### the original doesn't do anything special for sockets.... ###
1;
}
return $err ? $self->_error( $err ) : 1;
}
### don't know how to make symlinks, let's just extract the file as
### a plain file
sub _extract_special_file_as_plain_file {
my $self = shift;
my $entry = shift or return;
my $file = shift; return unless defined $file;
my $err;
TRY: {
my $orig = $self->_find_entry( $entry->linkname, $entry );
unless( $orig ) {
$err = qq[Could not find file '] . $entry->linkname .
qq[' in memory.];
last TRY;
}
### clone the entry, make it appear as a normal file ###
my $clone = $orig->clone;
$clone->_downgrade_to_plainfile;
$self->_extract_file( $clone, $file ) or last TRY;
return 1;
}
return $self->_error($err);
}
=head2 $tar->list_files( [\@properties] )
Returns a list of the names of all the files in the archive.
If C<list_files()> is passed an array reference as its first argument
it returns a list of hash references containing the requested
properties of each file. The following list of properties is
supported: name, size, mtime (last modified date), mode, uid, gid,
linkname, uname, gname, devmajor, devminor, prefix.
Passing an array reference containing only one element, 'name', is
special cased to return a list of names rather than a list of hash
references, making it equivalent to calling C<list_files> without
arguments.
=cut
sub list_files {
my $self = shift;
my $aref = shift || [ ];
unless( $self->_data ) {
$self->read() or return;
}
if( @$aref == 0 or ( @$aref == 1 and $aref->[0] eq 'name' ) ) {
return map { $_->full_path } @{$self->_data};
} else {
#my @rv;
#for my $obj ( @{$self->_data} ) {
# push @rv, { map { $_ => $obj->$_() } @$aref };
#}
#return @rv;
### this does the same as the above.. just needs a +{ }
### to make sure perl doesn't confuse it for a block
return map { my $o=$_;
+{ map { $_ => $o->$_() } @$aref }
} @{$self->_data};
}
}
sub _find_entry {
my $self = shift;
my $file = shift;
unless( defined $file ) {
$self->_error( qq[No file specified] );
return;
}
### it's an object already
return $file if UNIVERSAL::isa( $file, 'Archive::Tar::File' );
seach_entry:
if($self->_data){
for my $entry ( @{$self->_data} ) {
my $path = $entry->full_path;
return $entry if $path eq $file;
}
}
if($Archive::Tar::RESOLVE_SYMLINK!~/none/){
if(my $link_entry = shift()){#fallback mode when symlinks are using relative notations ( ../a/./b/text.bin )
$file = _symlinks_resolver( $link_entry->name, $file );
goto seach_entry if $self->_data;
lib/Archive/Tar.pm view on Meta::CPAN
### pad the end of the clone if required ###
print $handle TAR_PAD->( $clone->size ) if $clone->size % BLOCK
}
} ### done writing these entries
}
### write the end markers ###
print $handle TAR_END x 2 or
return $self->_error( qq[Could not write tar end markers] );
### did you want it written to a file, or returned as a string? ###
my $rv = length($file) ? 1
: $HAS_PERLIO ? $dummy
: do { seek $handle, 0, 0; local $/; <$handle> };
### make sure to close the handle if we created it
if ( $file ne $handle ) {
unless( close $handle ) {
$self->_error( qq[Could not write tar] );
return;
}
}
return $rv;
}
sub _format_tar_entry {
my $self = shift;
my $entry = shift or return;
my $ext_prefix = shift; $ext_prefix = '' unless defined $ext_prefix;
my $no_prefix = shift || 0;
my $file = $entry->name;
my $prefix = $entry->prefix; $prefix = '' unless defined $prefix;
### remove the prefix from the file name
### not sure if this is still needed --kane
### no it's not -- Archive::Tar::File->_new_from_file will take care of
### this for us. Even worse, this would break if we tried to add a file
### like x/x.
#if( length $prefix ) {
# $file =~ s/^$match//;
#}
$prefix = File::Spec::Unix->catdir($ext_prefix, $prefix)
if length $ext_prefix;
### not sure why this is... ###
my $l = PREFIX_LENGTH; # is ambiguous otherwise...
substr ($prefix, 0, -$l) = "" if length $prefix >= PREFIX_LENGTH;
my $f1 = "%06o"; my $f2 = $ZERO_PAD_NUMBERS ? "%011o" : "%11o";
### this might be optimizable with a 'changed' flag in the file objects ###
my $tar = pack (
PACK,
$file,
(map { sprintf( $f1, $entry->$_() ) } qw[mode uid gid]),
(map { sprintf( $f2, $entry->$_() ) } qw[size mtime]),
"", # checksum field - space padded a bit down
(map { $entry->$_() } qw[type linkname magic]),
$entry->version || TAR_VERSION,
(map { $entry->$_() } qw[uname gname]),
(map { sprintf( $f1, $entry->$_() ) } qw[devmajor devminor]),
($no_prefix ? '' : $prefix)
);
### add the checksum ###
my $checksum_fmt = $ZERO_PAD_NUMBERS ? "%06o\0" : "%06o\0";
substr($tar,148,7) = sprintf("%6o\0", unpack("%16C*",$tar));
return $tar;
}
=head2 $tar->add_files( @filenamelist )
Takes a list of filenames and adds them to the in-memory archive.
The path to the file is automatically converted to a Unix like
equivalent for use in the archive, and, if on MacOS, the file's
modification time is converted from the MacOS epoch to the Unix epoch.
So tar archives created on MacOS with B<Archive::Tar> can be read
both with I<tar> on Unix and applications like I<suntar> or
I<Stuffit Expander> on MacOS.
Be aware that the file's type/creator and resource fork will be lost,
which is usually what you want in cross-platform archives.
Instead of a filename, you can also pass it an existing C<Archive::Tar::File>
object from, for example, another archive. The object will be clone, and
effectively be a copy of the original, not an alias.
Returns a list of C<Archive::Tar::File> objects that were just added.
=cut
sub add_files {
my $self = shift;
my @files = @_ or return;
my @rv;
for my $file ( @files ) {
### you passed an Archive::Tar::File object
### clone it so we don't accidentally have a reference to
### an object from another archive
if( UNIVERSAL::isa( $file,'Archive::Tar::File' ) ) {
push @rv, $file->clone;
next;
}
eval {
if( utf8::is_utf8( $file )) {
utf8::encode( $file );
}
};
unless( -e $file || -l $file ) {
$self->_error( qq[No such file: '$file'] );
next;
}
my $obj = Archive::Tar::File->new( file => $file );
unless( $obj ) {
$self->_error( qq[Unable to add file: '$file'] );
next;
}
push @rv, $obj;
}
push @{$self->{_data}}, @rv;
return @rv;
}
=head2 $tar->add_data ( $filename, $data, [$opthashref] )
Takes a filename, a scalar full of data and optionally a reference to
a hash with specific options.
Will add a file to the in-memory archive, with name C<$filename> and
content C<$data>. Specific properties can be set using C<$opthashref>.
The following list of properties is supported: name, size, mtime
(last modified date), mode, uid, gid, linkname, uname, gname,
devmajor, devminor, prefix, type. (On MacOS, the file's path and
modification times are converted to Unix equivalents.)
Valid values for the file type are the following constants defined by
Archive::Tar::Constant:
=over 4
=item FILE
Regular file.
=item HARDLINK
=item SYMLINK
Hard and symbolic ("soft") links; linkname should specify target.
=item CHARDEV
=item BLOCKDEV
Character and block devices. devmajor and devminor should specify the major
and minor device numbers.
=item DIR
Directory.
=item FIFO
FIFO (named pipe).
=item SOCKET
Socket.
=back
Returns the C<Archive::Tar::File> object that was just added, or
C<undef> on failure.
=cut
sub add_data {
my $self = shift;
my ($file, $data, $opt) = @_;
my $obj = Archive::Tar::File->new( data => $file, $data, $opt );
unless( $obj ) {
$self->_error( qq[Unable to add file: '$file'] );
return;
}
push @{$self->{_data}}, $obj;
return $obj;
}
=head2 $tar->error( [$BOOL] )
lib/Archive/Tar.pm view on Meta::CPAN
=cut
sub iter {
my $class = shift;
my $filename = shift;
return unless defined $filename;
my $compressed = shift || 0;
my $opts = shift || {};
### get a handle to read from.
my $handle = $class->_get_handle(
$filename,
$compressed,
READ_ONLY->( ZLIB )
) or return;
my @data;
my $CONSTRUCT_ARGS = [ $filename, $compressed, $opts ];
return sub {
return shift(@data) if @data; # more than one file returned?
return unless $handle; # handle exhausted?
### read data, should only return file
my $tarfile = $class->_read_tar($handle, { %$opts, limit => 1 });
@data = @$tarfile if ref $tarfile && ref $tarfile eq 'ARRAY';
if($Archive::Tar::RESOLVE_SYMLINK!~/none/){
foreach(@data){
#may refine this heuristic for ON_UNIX?
if($_->linkname){
#is there a better slot to store/share it ?
$_->{'_archive'} = $CONSTRUCT_ARGS;
}
}
}
### return one piece of data
return shift(@data) if @data;
### data is exhausted, free the filehandle
undef $handle;
if(@$CONSTRUCT_ARGS == 4){
#free archive in memory
undef $CONSTRUCT_ARGS->[-1];
}
return;
};
}
=head2 Archive::Tar->list_archive($file, $compressed, [\@properties])
Returns a list of the names of all the files in the archive. The
first argument can either be the name of the tar file to list or a
reference to an open file handle (e.g. a GLOB reference).
If C<list_archive()> is passed an array reference as its third
argument it returns a list of hash references containing the requested
properties of each file. The following list of properties is
supported: full_path, name, size, mtime (last modified date), mode,
uid, gid, linkname, uname, gname, devmajor, devminor, prefix, type.
See C<Archive::Tar::File> for details about supported properties.
Passing an array reference containing only one element, 'name', is
special cased to return a list of names rather than a list of hash
references.
=cut
sub list_archive {
my $class = shift;
my $file = shift; return unless defined $file;
my $gzip = shift || 0;
my $tar = $class->new($file, $gzip);
return unless $tar;
return $tar->list_files( @_ );
}
=head2 Archive::Tar->extract_archive($file, $compressed)
Extracts the contents of the tar file. The first argument can either
be the name of the tar file to create or a reference to an open file
handle (e.g. a GLOB reference). All relative paths in the tar file will
be created underneath the current working directory.
C<extract_archive> will return a list of files it extracted.
If the archive extraction fails for any reason, C<extract_archive>
will return false. Please use the C<error> method to find the cause
of the failure.
=cut
sub extract_archive {
my $class = shift;
my $file = shift; return unless defined $file;
my $gzip = shift || 0;
my $tar = $class->new( ) or return;
return $tar->read( $file, $gzip, { extract => 1 } );
}
=head2 $bool = Archive::Tar->has_io_string
Returns true if we currently have C<IO::String> support loaded.
Either C<IO::String> or C<perlio> support is needed to support writing
stringified archives. Currently, C<perlio> is the preferred method, if
available.
See the C<GLOBAL VARIABLES> section to see how to change this preference.
=cut
sub has_io_string { return $HAS_IO_STRING; }
=head2 $bool = Archive::Tar->has_perlio
( run in 2.424 seconds using v1.01-cache-2.11-cpan-ceb78f64989 )