Apache2-SSI

 view release on metacpan or  search on metacpan

lib/Apache2/SSI/File/Type.pm  view on Meta::CPAN

##----------------------------------------------------------------------------
## Apache2 Server Side Include Parser - ~/lib/Apache2/SSI/File/Type.pm
## Version v0.1.3
## Copyright(c) 2024 DEGUEST Pte. Ltd.
## Author: Jacques Deguest <jack@deguest.jp>
## Created 2021/03/27
## Modified 2025/03/22
## All rights reserved
## 
## This program is free software; you can redistribute  it  and/or  modify  it
## under the same terms as Perl itself.
##----------------------------------------------------------------------------
package Apache2::SSI::File::Type;
BEGIN
{
    use strict;
    use warnings;
    use warnings::register;
    use parent qw( Module::Generic );
    use vars qw( $VERSION $TEMPLATES $ESC $MAGIC_DATA $MAGIC_DATA_SOURCE );
    use Digest::MD5;
    use File::Basename ();
    use File::Spec ();
    use IO::File;
    use JSON;
    use Scalar::Util ();
    use URI::file;
    our $VERSION = 'v0.1.3';
    # Translation of type in magic file to unpack template and byte count
    our $TEMPLATES = 
    {
        'byte'      => [ 'c', 1 ],
        'ubyte'     => [ 'C', 1 ],
        'char'      => [ 'c', 1 ],
        'uchar'     => [ 'C', 1 ],
        'short'     => [ 's', 2 ],
        'ushort'    => [ 'S', 2 ],
        'long'      => [ 'l', 4 ],
        'ulong'     => [ 'L', 4 ],
        'date'      => [ 'l', 4 ],
        'ubeshort'  => [ 'n', 2 ],
        'beshort'   => [ [ 'n', 'S', 's' ], 2 ],
        'ubelong'   => [   'N',             4 ],
        'belong'    => [ [ 'N', 'I', 'i' ], 4 ],
        'bedate'    => [   'N',             4 ],
        'uleshort'  => [   'v',             2 ],
        'leshort'   => [ [ 'v', 'S', 's' ], 2 ],
        'ulelong'   => [   'V',             4 ],
        'lelong'    => [ [ 'V', 'I', 'i' ], 4 ],
        'ledate'    => [   'V',             4 ],
        'string'    => undef(),
    };
    
    # For letter escapes in magic file
    our $ESC = 
    {
        'n' => "\n",
        'r' => "\r",
        'b' => "\b",
        't' => "\t",
        'f' => "\f"
    };
    # Cache
    our $MAGIC_DATA = [];
    # Keep a record of the source data file, if any, so we can re-use this cached data instead of re-reading from it
    our $MAGIC_DATA_SOURCE = '';
};

use strict;
use warnings;

sub init
{
    my $self = shift( @_ );
    my $file;
    $file = shift( @_ ) if( @_ % 2 );
    my $opts = $self->_get_args_as_hash( @_ );
    $opts->{magic} = $file if( length( $file ) );
    $self->{follow_links} = 1;
    $self->{check_magic}  = 0;
    # If there is an error or file is empty, it returns undef instead of application/octet-stream
    $self->{error_returns_undef} = 0;
    # Default to returns text/plain. If not, it will return an empty string and leave the caller to set the default mime-type.
    $self->{default_type} = 'text/plain';
    $self->{_init_strict_use_sub} = 1;
    $self->SUPER::init( @_ );
    $self->{magic}        = {};
    $self->{magic_data}   = [];
    my $load_json_data = sub
    {
        my $json_file = shift( @_ ) || return;
        my $io = IO::File->new( "<$json_file" ) ||
            return( $self->error( "Unable to open our own json magic file \"$json_file\": $!" ) );
        local $/;
        my $buf = scalar( <$io> );
        $io->close;
        local $@;
        # try-catch
        my $rv = eval
        {
            my $j = JSON->new->relaxed->allow_nonref;
            $MAGIC_DATA = $self->{magic_data} = $j->decode( $buf );
            return(1);
        };
        if( $@ )
        {
            return( $self->error( "An error occured while trying to json decode ", length( $buf ), " bytes of json data: $@" ) );
        }
        return( $rv );
    };
    
    if( $opts->{magic} )
    {
        $file = $opts->{magic};

lib/Apache2/SSI/File/Type.pm  view on Meta::CPAN

        }
    
        # Read another entry from the magic file if we've exhausted all the entries 
        # already buffered. read_magic_entry will add to the end of the array 
        # if there are more.
        if( $m == $#{ $self->{magic_data} } &&
            $self->{magic}->{io} && 
            !$self->{magic}->{io}->eof )
        {
            $self->read_magic_entry();
        }
    }
    
    # 4) Check if it's text or binary.
    # if It's text, then do a bunch of searching for special tokens
    if( !$match_found ) 
    {
        my $data = '';
        $io->seek( 0, 0 );
        $io->read( $data, 0x8564 );
        $type = $self->with_data( $data );
    }
    if( !defined( $type ) )
    {
        $type = $opts->{default} ? $opts->{default} : '';
    }
    return( $type );
}

sub parse_magic_file 
{
    my $self = shift( @_ );
    my $io   = shift( @_ );
    # Initialize values
    $self->{magic}->{io}     = $io;
    $self->{magic}->{buffer} = undef();
    $self->{magic}->{count}  = 0;
    while( !$io->eof() )
    {
        $self->read_magic_entry();
    }
    seek( $io, 0, 0 );
}

# parse_magic_line( $line, $line_num, $subtests )
#
# Parses the match info out of $line.  Returns a reference to an array.
#
#  Format is:
#
# [ offset, bytes, type, mask, operator, testval, template, sprintf, subtests ]
#     0      1      2       3        4         5        6        7      8
#
# subtests is an array like @$data.
sub parse_magic_line 
{
    my $self = shift( @_ );
    my( $line, $line_num, $subtests ) = @_;
    my( $offtype, $offset, $numbytes, $type, $mask, $operator, $testval, $template, $message );
    
    # This would be easier if escaped whitespace wasn't allowed.
    
    # Grab the offset and type.  offset can either be a decimal, oct, or hex offset or 
    # an indirect offset specified in parenthesis like (x[.[bsl]][+-][y]), or a relative 
    # offset specified by &. offtype : 0 = absolute, 1 = indirect, 2 = relative
    if( $line =~ s/^>*([&\(]?[a-flsx\.\+\-\d]+\)?)[[:blank:]\h]+(\S+)[[:blank:]\h]+// ) 
    {
        ( $offset, $type ) = ( $1, $2 );
        if( $offset =~ /^\(/ ) 
        {
            # Indirect offset.
            $offtype = 1;
            # Store as a reference [ offset1 type template offset2 ]
            my( $o1, $type, $o2 );
            if( ( $o1, $type, $o2 ) = ( $offset =~ /\((\d+)(\.[bsl])?([\+\-]?\d+)?\)/ ) )
            {
                $o1 = oct( $o1 ) if( $o1 =~ /^0/o );
                $o2 = oct( $o2 ) if( $o2 =~ /^0/o );
        
                $type =~ s/\.//;
                # Default to long
                $type = 'l' if( $type eq '' );
                # Type will be template for unpack
                $type =~ tr/b/c/;
                # Number of bytes
                my $sz = $type;
                $sz =~ tr/csl/124/;
        
                $offset = [ $o1, $sz, $type, int( $o2 ) ];
            } 
            else 
            {
                return( $self->error( "Bad indirect offset at line $line_num. '$offset'" ) );
            }
        }
        elsif( $offset =~ /^&/o ) 
        {
            # Relative offset
            $offtype = 2;
        
            $offset = substr( $offset, 1 );
            $offset = oct( $offset ) if( $offset =~ /^0/o );
        }
        else 
        {
            # Mormal absolute offset
            $offtype = 0;
        
            # Convert if needed
            $offset = oct( $offset ) if( $offset =~ /^0/o );
        }
    }
    else 
    {
        return( $self->error( "Bad Offset/Type at line $line_num. '$line'" ) );
    }
    
    # Check for & operator on type
    if( $type =~ s/&(.*)// ) 
    {
        $mask = $1;
        # Convert if needed
        $mask = oct( $mask ) if( $mask =~ /^0/o );
    }
    
    # Check if type is valid
    if( !exists( $TEMPLATES->{ $type } ) ) 
    {
        return( $self->error( "Invalid type '$type' at line $line_num" ) );
    }
    
    # Take everything after the first non-escaped space
    if( $line =~ s/([^\\])\s+(.*)/$1/ ) 
    {
        $message = $2;
    }
    else 
    {
        return( $self->error( "Missing or invalid test condition/message at line $line_num" ) );
    }
    
    # Remove the return if it is still there
    $line =~ s/\n$//o;

    # Get the operator. If 'x', must be alone. Default is '='.
    if( $line =~ s/^([><&^=!])//o ) 
    {
        $operator = $1;
    }
    elsif( $line eq 'x' ) 
    {
        $operator = 'x';
    }
    else
    {
        $operator = '=';
    }
    
    if( $type eq 'string' ) 
    {
        $testval = $line;
    
        # Do octal/hex conversion
        $testval =~ s/\\([x0-7][0-7]?[0-7]?)/chr( oct( $1 ) )/eg;
    
        # Do single char escapes
        $testval =~ s/\\(.)/$ESC->{ $1 }||$1/eg;
    
        # Put the number of bytes to read in numbytes.
        # '0' means read to \0 or \n.
        if( $operator =~ /[>x]/o ) 
        {
            $numbytes = 0;
        }
        elsif( $operator =~ /[=<]/o ) 
        {
            $numbytes = length( $testval );
        }
        elsif( $operator eq '!' )
        {
            # Annoying special case. ! operator only applies to numerics so put it back.
            $testval  = $operator . $testval;
            $numbytes = length( $testval );
            $operator = '=';
        }
        else 
        {
            # There's a bug in my magic file where there's a line that says
            # "0    string    ^!<arc..." and the BSD file program treats the argument 
            # like a numeric. To minimize hassles, complain about bad ops only if -c is set.
            return( $self->error( "Invalid operator '$operator' for type 'string' at line $line_num." ) );
        }
    }
    else 
    {
        # Numeric
        if( $operator ne 'x' ) 
        {
            # This conversion is very forgiving. Tt's faster and it doesn't complain 
            # about bugs in popular magic files, but it will silently turn a string into zero.
            if( $line =~ /^0/o ) 
            {
                $testval = oct( $line );
            } 
            else 
            {
                $testval = int( $line );
            }
        }
    
        ( $template, $numbytes ) = @{$TEMPLATES->{ $type }};
    
        # Unset coercion of $unsigned unless we're doing order comparison
        if( ref( $template ) ) 
        {
            $template = $template->[0] unless( $operator eq '>' || $operator eq '<' );
        }
    }
    return( [ $offtype, $offset, $numbytes, $type, $mask, $operator, $testval, $template, $message, $subtests ] );
}

# read_magic_entry( $magic_data, $depth )
#
# Reads the next entry from the magic file and stores it as a ref to an array at the 
# end of @$magic_data.
#



( run in 1.870 second using v1.01-cache-2.11-cpan-39bf76dae61 )