Apache2-SSI
view release on metacpan or search on metacpan
lib/Apache2/SSI/File/Type.pm view on Meta::CPAN
my $self = shift( @_ );
my $file;
$file = shift( @_ ) if( @_ % 2 );
my $opts = $self->_get_args_as_hash( @_ );
$opts->{magic} = $file if( length( $file ) );
$self->{follow_links} = 1;
$self->{check_magic} = 0;
# If there is an error or file is empty, it returns undef instead of application/octet-stream
$self->{error_returns_undef} = 0;
# Default to returns text/plain. If not, it will return an empty string and leave the caller to set the default mime-type.
$self->{default_type} = 'text/plain';
$self->{_init_strict_use_sub} = 1;
$self->SUPER::init( @_ );
$self->{magic} = {};
$self->{magic_data} = [];
my $load_json_data = sub
{
my $json_file = shift( @_ ) || return;
my $io = IO::File->new( "<$json_file" ) ||
return( $self->error( "Unable to open our own json magic file \"$json_file\": $!" ) );
local $/;
my $buf = scalar( <$io> );
$io->close;
local $@;
# try-catch
my $rv = eval
{
my $j = JSON->new->relaxed->allow_nonref;
$MAGIC_DATA = $self->{magic_data} = $j->decode( $buf );
return(1);
};
if( $@ )
{
return( $self->error( "An error occured while trying to json decode ", length( $buf ), " bytes of json data: $@" ) );
}
return( $rv );
};
if( $opts->{magic} )
{
$file = $opts->{magic};
my $file_abs = URI::file->new_abs( $file )->file( $^O );
if( $file_abs eq $MAGIC_DATA_SOURCE && scalar( @$MAGIC_DATA ) )
{
$self->{magic_data} = $MAGIC_DATA;
}
else
{
my $checksum = Digest::MD5::md5_hex( $file_abs );
my $base = File::Basename::basename( $file );
my $path = File::Spec->catpath( File::Spec->tmpdir, $base . "_${checksum}.json" );
if( -e( $path ) && -s( $path ) )
{
$load_json_data->( $path ) || return;
}
else
{
return( $self->error( "Magic file provided \"$file\" does not exist." ) ) if( !-e( $file ) );
my $io = IO::File->new( "<$file" ) ||
return( $self->error( "Unable to open magic file provided \"$file\": $!" ) );
$io->binmode;
$self->parse_magic_file( $io );
$MAGIC_DATA = $self->{magic_data};
$io->close;
my $json = $self->as_json || return;
my $fh = IO::File->new( ">$path" ) ||
return( $self->error( "Unable to write to magic cache json data file \"$path\": $!" ) );
$fh->binmode;
$fh->print( $json );
$fh->close;
}
$MAGIC_DATA_SOURCE = $file_abs;
}
}
elsif( $MAGIC_DATA && scalar( @$MAGIC_DATA ) )
{
$self->{magic_data} = $MAGIC_DATA;
}
else
{
$file = __FILE__;
$file =~ s/\.pm/\.json/;
return( $self->error( "Apache2::SSI magic file \"$file\" does not exist." ) ) if( !-e( $file ) );
$load_json_data->( $file ) || return;
}
# From the BSD names.h, some tokens for hard-coded checks of different texts.
# This isn't rocket science. It's prone to failure so these checks are only a last resort.
$self->{SPECIALS} =
{
'message/rfc822' =>
[
'^Received:',
'^>From ',
'^From ',
'^To: ',
'^Return-Path: ',
'^Cc: ',
'^X-Mailer: '
],
'message/news' =>
[
'^Newsgroups: ',
'^Path: ',
'^X-Newsreader: '
],
'text/html' =>
[
'<html[^>]*>',
'<HTML[^>]*>',
'<head[^>]*>',
'<HEAD[^>]*>',
'<body[^>]*>',
'<BODY[^>]*>',
'<title[^>]*>',
'<TITLE[^>]*>',
'<h1[^>]*>',
'<H1[^>]*>',
],
'text/x-roff' =>
[
"^\\.SH",
"^\\.PP",
"^\\.TH",
"^\\.BR",
"^\\.SS",
"^\\.TP",
"^\\.IR",
],
};
$self->{FILE_EXTS} =
{
qr/\.gz$/ => 'application/x-gzip',
qr/\.bz2$/ => 'application/x-bzip2',
qr/\.Z$/ => 'application/x-compress',
qr/\.txt$/ => 'text/plain',
qr/\.html$/ => 'text/html',
qr/\.htm$/ => 'text/html',
};
return( $self );
}
sub as_json
{
my $self = shift( @_ );
my $data = $self->{magic_data};
my $j = JSON->new->relaxed->allow_nonref;
my $json = $j->pretty->encode( $data );
return( $json );
}
sub check
{
my $self = shift( @_ );
my $file = shift( @_ );
my $prev = $self->check_magic;
$self->check_magic( 1 );
my $io = IO::File->new( "<$file" ) || return( $self->error( "Unable to open magic file \"$file\": $!" ) );
$io->binmode;
$self->{magic}->{io} = $io;
my $data = [];
while( !$io->eof() )
{
$self->read_magic_entry( $data );
}
$io->close();
$self->dump( $data );
$self->check_magic( $prev );
return( $self );
}
sub check_magic { return( shift->_set_get_boolean( 'check_magic', @_ ) ); }
sub data
{
my $self = shift( @_ );
my $data = shift( @_ );
my $type = '';
if( length( $data ) <= 0 )
{
return( $self->{default_type} ? 'application/octet-stream' : '' );
}
$type = $self->with_magic( $data );
# 4) Check if it's text or binary.
# If it's text, then do a bunch of searching for special tokens
if( !defined( $type ) )
{
$type = $self->with_data( $data );
}
if( !defined( $type ) )
{
$type = $self->{default_type} ? 'text/plain' : '';
}
return( $type );
}
sub default_type { return( shift->_set_get_scalar( 'default_type', @_ ) ); }
# Recursively write the magic file to stderr.
# Numbers are written in decimal.
sub dump
{
my $self = shift( @_ );
my $data = shift( @_ ) || $self->{magic_data};
my $depth = shift( @_ );
$data = [] unless( defined( $data ) );
$depth = 0 unless( defined( $depth ) );
our $err = IO::File->new;
$err->autoflush( 1 );
$err->fdopen( fileno( STDERR ), 'w' ) || return( $self->error( "Cannot write to STDERR: $!" ) );
$err->binmode;
foreach my $entry ( @$data )
{
# Delayed evaluation.
$entry = $self->parse_magic_line( @$entry ) if( scalar( @$entry ) == 3 );
next if( !defined( $entry ) );
my( $offtype, $offset, $numbytes, $type, $mask, $op, $testval, $template, $message, $subtests ) = @$entry;
$err->print( '>' x $depth );
if( $offtype == 1 )
{
$offset->[2] =~ tr/c/b/;
$err->printf( "(%s.%s%s)", $offset->[0], $offset->[2], $offset->[3] );
}
elsif( $offtype == 2 )
{
$err->print( "&", $offset );
}
else
{
# offtype == 0
$err->print( $offset );
}
$err->print( "\t", $type );
if( $mask )
{
$err->print( "&", $mask );
}
$err->print( "\t", $op, $testval, "\t", $message, "\n" );
if( $subtests )
{
$self->dump( $subtests, $depth + 1 );
}
}
}
sub error_returns_undef { return( shift->_set_get_boolean( 'error_returns_undef', @_ ) ); }
sub file
{
my $self = shift( @_ );
# The description line. append info to this string
my $desc = '';
my $type = '';
# Iterate over each file explicitly so we can seek
my $file = shift( @_ ) || do
{
if( $self->{error_returns_undef} )
{
return( $self->error( "Missing file arguement. Usage: \$magic->file( \$some_file_name )" ) );
}
else
{
$desc .= "no file provided.";
return( "x-system/x-error; $desc" );
}
};
# No need to let everybody know what is our server file system
my $base_file = File::Basename::basename( $file );
lib/Apache2/SSI/File/Type.pm view on Meta::CPAN
}
# 1) Check permission
elsif( !-r( $file ) )
{
if( $self->{error_returns_undef} )
{
return( $self->error( "Unable to read file '$file'; lacking permission" ) );
}
else
{
$desc .= "unable to read '$base_file': Permission denied.";
return( "x-system/x-error; $desc" );
}
}
# 2) Check for various special files first
if( $self->follow_links )
{
CORE::stat( $file );
}
else
{
CORE::lstat( $file );
}
# Avoid doing many useless redondant system stat, use '_'. See perlfunc man page
if( !-f( _ ) || -z( _ ) )
{
if( !$self->follow_links && -l( _ ) )
{
#$desc .= " symbolic link to ". readlink( $file );
return( 'application/x-link' );
}
elsif( -d( _ ) ) { return( 'application/x-directory' ); }
# Named pipe
elsif( -p( _ ) ) { return( 'application/x-pipe' ); }
elsif( -S( _ ) ) { return( 'application/x-socket' ); }
# Block special file
elsif( -b( _ ) ) { return( 'application/x-block' ); }
# Character special file
elsif( -c( _ ) ) { return( 'application/x-character' ); }
elsif( -z( _ ) ) { return( 'application/x-empty' ); }
else
{
return( $self->{default_type} ? $self->{default_type} : 'application/x-unknown' );
}
}
# Current file handle. or undef if check_magic (-c option) is true.
my $io;
$io = IO::File->new( "<$file" ) || do
{
if( $self->{error_returns_undef} )
{
return( $self->error( "Unable to open file '$file': $!" ) );
}
else
{
return( "x-system/x-error; $base_file: $!" );
}
};
$io->binmode;
# 3) Check for script
# if( ( -x( $file ) || ( $^O =~ /^(dos|mswin32|NetWare|symbian|win32)$/i && $file =~ /\.(?:pl|cgi)$/ ) ) &&
# if( ( -x( $file ) || $file =~ /\.(?:cgi|pl|t)$/ ) &&
# -T( _ ) )
my $default;
if( -x( $file ) && -T( _ ) )
{
# Note, some magic files include elaborate attempts to match #! header lines
# and return pretty responses but this slows down matching and is unnecessary.
my $line1 = $io->getline;
if( $line1 =~ /^\#![[:blank:]\h]*(\S+)/ )
{
# Returns the binary name, without file path
my $bin_name = File::Basename::basename( $1 );
#$desc .= " executable $bin_name script text";
# $io->close;
# return( "text/x-${bin_name}" );
$default = "text/x-${bin_name}";
}
}
my $out = $self->handle( $io, $desc, { default => $default } );
$io->close;
return( $out );
}
sub follow_links { return( shift->_set_get_boolean( 'follow_links', @_ ) ); }
sub handle
{
my $self = shift( @_ );
my $io = shift( @_ );
my $desc = shift( @_ );
my $opts = $self->_get_args_as_hash( @_ );
$opts->{default} = $self->default_type if( !length( $opts->{default} ) );
my $type = '';
# 3) Iterate over each magic entry.
my $match_found = 0;
for( my $m = 0; $m <= $#{ $self->{magic_data} }; $m++ )
{
# Check if the m-th magic entry matches and if it does, then $desc will contain
# an updated description
my $test;
if( ( $test = $self->_magic_match( $self->{magic_data}->[$m], \$desc, $io ) ) )
{
if( defined( $desc ) && $desc ne '' )
{
$match_found = 1;
$type = $desc;
last;
}
}
elsif( !defined( $test ) )
{
warnings::warn( "Error occurred while checking for match: ", $self->error ) if( warnings::enabled() && $self->debug );
}
# Read another entry from the magic file if we've exhausted all the entries
# already buffered. read_magic_entry will add to the end of the array
( run in 1.996 second using v1.01-cache-2.11-cpan-39bf76dae61 )