Apache2-SSI
view release on metacpan or search on metacpan
lib/Apache2/SSI/File/Type.pm view on Meta::CPAN
##----------------------------------------------------------------------------
## Apache2 Server Side Include Parser - ~/lib/Apache2/SSI/File/Type.pm
## Version v0.1.3
## Copyright(c) 2024 DEGUEST Pte. Ltd.
## Author: Jacques Deguest <jack@deguest.jp>
## Created 2021/03/27
## Modified 2025/03/22
## All rights reserved
##
## This program is free software; you can redistribute it and/or modify it
## under the same terms as Perl itself.
##----------------------------------------------------------------------------
package Apache2::SSI::File::Type;
BEGIN
{
use strict;
use warnings;
use warnings::register;
use parent qw( Module::Generic );
use vars qw( $VERSION $TEMPLATES $ESC $MAGIC_DATA $MAGIC_DATA_SOURCE );
use Digest::MD5;
use File::Basename ();
use File::Spec ();
use IO::File;
use JSON;
use Scalar::Util ();
use URI::file;
our $VERSION = 'v0.1.3';
# Translation of type in magic file to unpack template and byte count
our $TEMPLATES =
{
'byte' => [ 'c', 1 ],
'ubyte' => [ 'C', 1 ],
'char' => [ 'c', 1 ],
'uchar' => [ 'C', 1 ],
'short' => [ 's', 2 ],
'ushort' => [ 'S', 2 ],
'long' => [ 'l', 4 ],
'ulong' => [ 'L', 4 ],
'date' => [ 'l', 4 ],
'ubeshort' => [ 'n', 2 ],
'beshort' => [ [ 'n', 'S', 's' ], 2 ],
'ubelong' => [ 'N', 4 ],
'belong' => [ [ 'N', 'I', 'i' ], 4 ],
'bedate' => [ 'N', 4 ],
'uleshort' => [ 'v', 2 ],
'leshort' => [ [ 'v', 'S', 's' ], 2 ],
'ulelong' => [ 'V', 4 ],
'lelong' => [ [ 'V', 'I', 'i' ], 4 ],
'ledate' => [ 'V', 4 ],
'string' => undef(),
};
# For letter escapes in magic file
our $ESC =
{
'n' => "\n",
'r' => "\r",
'b' => "\b",
't' => "\t",
'f' => "\f"
};
# Cache
our $MAGIC_DATA = [];
# Keep a record of the source data file, if any, so we can re-use this cached data instead of re-reading from it
our $MAGIC_DATA_SOURCE = '';
};
use strict;
use warnings;
sub init
{
my $self = shift( @_ );
my $file;
$file = shift( @_ ) if( @_ % 2 );
my $opts = $self->_get_args_as_hash( @_ );
$opts->{magic} = $file if( length( $file ) );
$self->{follow_links} = 1;
$self->{check_magic} = 0;
# If there is an error or file is empty, it returns undef instead of application/octet-stream
$self->{error_returns_undef} = 0;
# Default to returns text/plain. If not, it will return an empty string and leave the caller to set the default mime-type.
$self->{default_type} = 'text/plain';
$self->{_init_strict_use_sub} = 1;
$self->SUPER::init( @_ );
$self->{magic} = {};
$self->{magic_data} = [];
my $load_json_data = sub
{
my $json_file = shift( @_ ) || return;
my $io = IO::File->new( "<$json_file" ) ||
return( $self->error( "Unable to open our own json magic file \"$json_file\": $!" ) );
local $/;
my $buf = scalar( <$io> );
$io->close;
local $@;
# try-catch
my $rv = eval
{
my $j = JSON->new->relaxed->allow_nonref;
$MAGIC_DATA = $self->{magic_data} = $j->decode( $buf );
return(1);
};
if( $@ )
{
return( $self->error( "An error occured while trying to json decode ", length( $buf ), " bytes of json data: $@" ) );
}
return( $rv );
};
if( $opts->{magic} )
{
$file = $opts->{magic};
lib/Apache2/SSI/File/Type.pm view on Meta::CPAN
}
# Read another entry from the magic file if we've exhausted all the entries
# already buffered. read_magic_entry will add to the end of the array
# if there are more.
if( $m == $#{ $self->{magic_data} } &&
$self->{magic}->{io} &&
!$self->{magic}->{io}->eof )
{
$self->read_magic_entry();
}
}
# 4) Check if it's text or binary.
# if It's text, then do a bunch of searching for special tokens
if( !$match_found )
{
my $data = '';
$io->seek( 0, 0 );
$io->read( $data, 0x8564 );
$type = $self->with_data( $data );
}
if( !defined( $type ) )
{
$type = $opts->{default} ? $opts->{default} : '';
}
return( $type );
}
sub parse_magic_file
{
my $self = shift( @_ );
my $io = shift( @_ );
# Initialize values
$self->{magic}->{io} = $io;
$self->{magic}->{buffer} = undef();
$self->{magic}->{count} = 0;
while( !$io->eof() )
{
$self->read_magic_entry();
}
seek( $io, 0, 0 );
}
# parse_magic_line( $line, $line_num, $subtests )
#
# Parses the match info out of $line. Returns a reference to an array.
#
# Format is:
#
# [ offset, bytes, type, mask, operator, testval, template, sprintf, subtests ]
# 0 1 2 3 4 5 6 7 8
#
# subtests is an array like @$data.
sub parse_magic_line
{
my $self = shift( @_ );
my( $line, $line_num, $subtests ) = @_;
my( $offtype, $offset, $numbytes, $type, $mask, $operator, $testval, $template, $message );
# This would be easier if escaped whitespace wasn't allowed.
# Grab the offset and type. offset can either be a decimal, oct, or hex offset or
# an indirect offset specified in parenthesis like (x[.[bsl]][+-][y]), or a relative
# offset specified by &. offtype : 0 = absolute, 1 = indirect, 2 = relative
if( $line =~ s/^>*([&\(]?[a-flsx\.\+\-\d]+\)?)[[:blank:]\h]+(\S+)[[:blank:]\h]+// )
{
( $offset, $type ) = ( $1, $2 );
if( $offset =~ /^\(/ )
{
# Indirect offset.
$offtype = 1;
# Store as a reference [ offset1 type template offset2 ]
my( $o1, $type, $o2 );
if( ( $o1, $type, $o2 ) = ( $offset =~ /\((\d+)(\.[bsl])?([\+\-]?\d+)?\)/ ) )
{
$o1 = oct( $o1 ) if( $o1 =~ /^0/o );
$o2 = oct( $o2 ) if( $o2 =~ /^0/o );
$type =~ s/\.//;
# Default to long
$type = 'l' if( $type eq '' );
# Type will be template for unpack
$type =~ tr/b/c/;
# Number of bytes
my $sz = $type;
$sz =~ tr/csl/124/;
$offset = [ $o1, $sz, $type, int( $o2 ) ];
}
else
{
return( $self->error( "Bad indirect offset at line $line_num. '$offset'" ) );
}
}
elsif( $offset =~ /^&/o )
{
# Relative offset
$offtype = 2;
$offset = substr( $offset, 1 );
$offset = oct( $offset ) if( $offset =~ /^0/o );
}
else
{
# Mormal absolute offset
$offtype = 0;
# Convert if needed
$offset = oct( $offset ) if( $offset =~ /^0/o );
}
}
else
{
return( $self->error( "Bad Offset/Type at line $line_num. '$line'" ) );
}
# Check for & operator on type
if( $type =~ s/&(.*)// )
{
$mask = $1;
# Convert if needed
$mask = oct( $mask ) if( $mask =~ /^0/o );
}
# Check if type is valid
if( !exists( $TEMPLATES->{ $type } ) )
{
return( $self->error( "Invalid type '$type' at line $line_num" ) );
}
# Take everything after the first non-escaped space
if( $line =~ s/([^\\])\s+(.*)/$1/ )
{
$message = $2;
}
else
{
return( $self->error( "Missing or invalid test condition/message at line $line_num" ) );
}
# Remove the return if it is still there
$line =~ s/\n$//o;
# Get the operator. If 'x', must be alone. Default is '='.
if( $line =~ s/^([><&^=!])//o )
{
$operator = $1;
}
elsif( $line eq 'x' )
{
$operator = 'x';
}
else
{
$operator = '=';
}
if( $type eq 'string' )
{
$testval = $line;
# Do octal/hex conversion
$testval =~ s/\\([x0-7][0-7]?[0-7]?)/chr( oct( $1 ) )/eg;
# Do single char escapes
$testval =~ s/\\(.)/$ESC->{ $1 }||$1/eg;
# Put the number of bytes to read in numbytes.
# '0' means read to \0 or \n.
if( $operator =~ /[>x]/o )
{
$numbytes = 0;
}
elsif( $operator =~ /[=<]/o )
{
$numbytes = length( $testval );
}
elsif( $operator eq '!' )
{
# Annoying special case. ! operator only applies to numerics so put it back.
$testval = $operator . $testval;
$numbytes = length( $testval );
$operator = '=';
}
else
{
# There's a bug in my magic file where there's a line that says
# "0 string ^!<arc..." and the BSD file program treats the argument
# like a numeric. To minimize hassles, complain about bad ops only if -c is set.
return( $self->error( "Invalid operator '$operator' for type 'string' at line $line_num." ) );
}
}
else
{
# Numeric
if( $operator ne 'x' )
{
# This conversion is very forgiving. Tt's faster and it doesn't complain
# about bugs in popular magic files, but it will silently turn a string into zero.
if( $line =~ /^0/o )
{
$testval = oct( $line );
}
else
{
$testval = int( $line );
}
}
( $template, $numbytes ) = @{$TEMPLATES->{ $type }};
# Unset coercion of $unsigned unless we're doing order comparison
if( ref( $template ) )
{
$template = $template->[0] unless( $operator eq '>' || $operator eq '<' );
}
}
return( [ $offtype, $offset, $numbytes, $type, $mask, $operator, $testval, $template, $message, $subtests ] );
}
# read_magic_entry( $magic_data, $depth )
#
# Reads the next entry from the magic file and stores it as a ref to an array at the
# end of @$magic_data.
#
( run in 1.870 second using v1.01-cache-2.11-cpan-39bf76dae61 )