Apache2-SSI

 view release on metacpan or  search on metacpan

lib/Apache2/SSI/File/Type.pm  view on Meta::CPAN

    my $self = shift( @_ );
    my $file;
    $file = shift( @_ ) if( @_ % 2 );
    my $opts = $self->_get_args_as_hash( @_ );
    $opts->{magic} = $file if( length( $file ) );
    $self->{follow_links} = 1;
    $self->{check_magic}  = 0;
    # If there is an error or file is empty, it returns undef instead of application/octet-stream
    $self->{error_returns_undef} = 0;
    # Default to returns text/plain. If not, it will return an empty string and leave the caller to set the default mime-type.
    $self->{default_type} = 'text/plain';
    $self->{_init_strict_use_sub} = 1;
    $self->SUPER::init( @_ );
    $self->{magic}        = {};
    $self->{magic_data}   = [];
    my $load_json_data = sub
    {
        my $json_file = shift( @_ ) || return;
        my $io = IO::File->new( "<$json_file" ) ||
            return( $self->error( "Unable to open our own json magic file \"$json_file\": $!" ) );
        local $/;
        my $buf = scalar( <$io> );
        $io->close;
        local $@;
        # try-catch
        my $rv = eval
        {
            my $j = JSON->new->relaxed->allow_nonref;
            $MAGIC_DATA = $self->{magic_data} = $j->decode( $buf );
            return(1);
        };
        if( $@ )
        {
            return( $self->error( "An error occured while trying to json decode ", length( $buf ), " bytes of json data: $@" ) );
        }
        return( $rv );
    };
    
    if( $opts->{magic} )
    {
        $file = $opts->{magic};
        my $file_abs = URI::file->new_abs( $file )->file( $^O );
        if( $file_abs eq $MAGIC_DATA_SOURCE && scalar( @$MAGIC_DATA ) )
        {
            $self->{magic_data} = $MAGIC_DATA;
        }
        else
        {
            my $checksum = Digest::MD5::md5_hex( $file_abs );
            my $base = File::Basename::basename( $file );
            my $path = File::Spec->catpath( File::Spec->tmpdir, $base . "_${checksum}.json" );
            if( -e( $path ) && -s( $path ) )
            {
                $load_json_data->( $path ) || return;
            }
            else
            {
                return( $self->error( "Magic file provided \"$file\" does not exist." ) ) if( !-e( $file ) );
                my $io = IO::File->new( "<$file" ) ||
                    return( $self->error( "Unable to open magic file provided \"$file\": $!" ) );
                $io->binmode;
                $self->parse_magic_file( $io );
                $MAGIC_DATA = $self->{magic_data};
                $io->close;
                my $json = $self->as_json || return;
                my $fh = IO::File->new( ">$path" ) || 
                    return( $self->error( "Unable to write to magic cache json data file \"$path\": $!" ) );
                $fh->binmode;
                $fh->print( $json );
                $fh->close;
            }
            $MAGIC_DATA_SOURCE = $file_abs;
        }
    }
    elsif( $MAGIC_DATA && scalar( @$MAGIC_DATA ) )
    {
        $self->{magic_data} = $MAGIC_DATA;
    }
    else
    {
        $file = __FILE__;
        $file =~ s/\.pm/\.json/;
        return( $self->error( "Apache2::SSI magic file \"$file\" does not exist." ) ) if( !-e( $file ) );
        $load_json_data->( $file ) || return;
    }
    
    # From the BSD names.h, some tokens for hard-coded checks of different texts.
    # This isn't rocket science. It's prone to failure so these checks are only a last resort.
    $self->{SPECIALS} = 
    {
        'message/rfc822' => 
            [
            '^Received:',   
            '^>From ',       
            '^From ',       
            '^To: ',
            '^Return-Path: ',
            '^Cc: ',
            '^X-Mailer: '
            ],
        'message/news' => 
            [
            '^Newsgroups: ', 
            '^Path: ',       
            '^X-Newsreader: '
            ],
        'text/html' => 
            [
            '<html[^>]*>',
            '<HTML[^>]*>',
            '<head[^>]*>',
            '<HEAD[^>]*>',
            '<body[^>]*>',
            '<BODY[^>]*>',
            '<title[^>]*>',
            '<TITLE[^>]*>',
            '<h1[^>]*>',
            '<H1[^>]*>',
            ],
        'text/x-roff' => 
            [
            "^\\.SH",
            "^\\.PP",
            "^\\.TH",
            "^\\.BR",
            "^\\.SS",
            "^\\.TP",
            "^\\.IR",
            ],
    };

    $self->{FILE_EXTS} = 
    {
        qr/\.gz$/   => 'application/x-gzip',
        qr/\.bz2$/  => 'application/x-bzip2',
        qr/\.Z$/    => 'application/x-compress',
        qr/\.txt$/  => 'text/plain',
        qr/\.html$/ => 'text/html',
        qr/\.htm$/  => 'text/html',
    };
    return( $self );
}

sub as_json
{
    my $self = shift( @_ );
    my $data = $self->{magic_data};
    my $j = JSON->new->relaxed->allow_nonref;
    my $json = $j->pretty->encode( $data );
    return( $json );
}

sub check
{
    my $self = shift( @_ );
    my $file = shift( @_ );
    my $prev  = $self->check_magic;
    $self->check_magic( 1 );
    my $io = IO::File->new( "<$file" ) || return( $self->error( "Unable to open magic file \"$file\": $!" ) );
    $io->binmode;
    $self->{magic}->{io} = $io;
    my $data = [];
    while( !$io->eof() )
    {
        $self->read_magic_entry( $data );
    }
    $io->close();
    $self->dump( $data );
    $self->check_magic( $prev );
    return( $self );
}

sub check_magic { return( shift->_set_get_boolean( 'check_magic', @_ ) ); }

sub data 
{
    my $self = shift( @_ );
    my $data = shift( @_ );
    my $type = '';
    
    if( length( $data ) <= 0 )
    {
        return( $self->{default_type} ? 'application/octet-stream' : '' );
    }
    
    $type = $self->with_magic( $data );
    
    # 4) Check if it's text or binary.
    # If it's text, then do a bunch of searching for special tokens
    if( !defined( $type ) ) 
    {
        $type = $self->with_data( $data );
    }
    if( !defined( $type ) )
    {
        $type = $self->{default_type} ? 'text/plain' : '';
    }
    return( $type );
}

sub default_type { return( shift->_set_get_scalar( 'default_type', @_ ) ); }

# Recursively write the magic file to stderr.
# Numbers are written in decimal.
sub dump
{
    my $self  = shift( @_ );
    my $data  = shift( @_ ) || $self->{magic_data};
    my $depth = shift( @_ );
    $data  = [] unless( defined( $data ) );
    $depth = 0 unless( defined( $depth ) );
    our $err = IO::File->new;
    $err->autoflush( 1 );
    $err->fdopen( fileno( STDERR ), 'w' ) || return( $self->error( "Cannot write to STDERR: $!" ) );
    $err->binmode;

    foreach my $entry ( @$data )
    {
        # Delayed evaluation.
        $entry = $self->parse_magic_line( @$entry ) if( scalar( @$entry ) == 3 );
        next if( !defined( $entry ) );
        my( $offtype, $offset, $numbytes, $type, $mask, $op, $testval, $template, $message, $subtests ) = @$entry;
        $err->print( '>' x $depth );
        if( $offtype == 1 ) 
        {
            $offset->[2] =~ tr/c/b/;
            $err->printf( "(%s.%s%s)", $offset->[0], $offset->[2], $offset->[3] );
        }
        elsif( $offtype == 2 ) 
        {
            $err->print( "&", $offset );
        }
        else 
        {
            # offtype == 0
            $err->print( $offset );
        }
        $err->print( "\t", $type );
        if( $mask ) 
        {
            $err->print( "&", $mask );
        }
        $err->print( "\t", $op, $testval, "\t", $message, "\n" );
    
        if( $subtests ) 
        {
            $self->dump( $subtests, $depth + 1 );
        }
    }
}

sub error_returns_undef { return( shift->_set_get_boolean( 'error_returns_undef', @_ ) ); }

sub file 
{
    my $self = shift( @_ );
    # The description line. append info to this string
    my $desc = '';
    my $type = '';
    # Iterate over each file explicitly so we can seek
    my $file = shift( @_ ) || do
    {
        if( $self->{error_returns_undef} )
        {
            return( $self->error( "Missing file arguement. Usage: \$magic->file( \$some_file_name )" ) );
        }
        else
        {
            $desc .= "no file provided.";
            return( "x-system/x-error; $desc" );
        }
    };
    
    # No need to let everybody know what is our server file system
    my $base_file = File::Basename::basename( $file );

lib/Apache2/SSI/File/Type.pm  view on Meta::CPAN

    }
    # 1) Check permission
    elsif( !-r( $file ) ) 
    {
        if( $self->{error_returns_undef} )
        {
            return( $self->error( "Unable to read file '$file'; lacking permission" ) );
        }
        else
        {
            $desc .= "unable to read '$base_file': Permission denied.";
            return( "x-system/x-error; $desc" );
        }
    }
    
    # 2) Check for various special files first
    if( $self->follow_links ) 
    {
        CORE::stat( $file ); 
    } 
    else 
    {
        CORE::lstat( $file );
    }
    # Avoid doing many useless redondant system stat, use '_'. See perlfunc man page
    if( !-f( _ ) || -z( _ ) ) 
    {
        if( !$self->follow_links && -l( _ ) ) 
        { 
            #$desc .= " symbolic link to ". readlink( $file );
            return( 'application/x-link' );
        }
        elsif( -d( _ ) ) { return( 'application/x-directory' ); }
        # Named pipe
        elsif( -p( _ ) ) { return( 'application/x-pipe' ); }
        elsif( -S( _ ) ) { return( 'application/x-socket' ); }
        # Block special file
        elsif( -b( _ ) ) { return( 'application/x-block' ); }
        # Character special file
        elsif( -c( _ ) ) { return( 'application/x-character' ); }
        elsif( -z( _ ) ) { return( 'application/x-empty' ); }
        else 
        {
            return( $self->{default_type} ? $self->{default_type} : 'application/x-unknown' );
        }
    }
    
    # Current file handle. or undef if check_magic (-c option) is true.
    my $io;
    $io = IO::File->new( "<$file" ) || do
    {
        if( $self->{error_returns_undef} )
        {
            return( $self->error( "Unable to open file '$file': $!" ) );
        }
        else
        {
            return( "x-system/x-error; $base_file: $!" );
        }
    };
    $io->binmode;
    
    # 3) Check for script
    # if( ( -x( $file ) || ( $^O =~ /^(dos|mswin32|NetWare|symbian|win32)$/i && $file =~ /\.(?:pl|cgi)$/ ) ) && 
#     if( ( -x( $file ) || $file =~ /\.(?:cgi|pl|t)$/ ) && 
#         -T( _ ) ) 
    my $default;
    if( -x( $file ) && -T( _ ) ) 
    {
        # Note, some magic files include elaborate attempts to match #! header lines 
        # and return pretty responses but this slows down matching and is unnecessary.
        my $line1 = $io->getline;
        if( $line1 =~ /^\#![[:blank:]\h]*(\S+)/ ) 
        {
            # Returns the binary name, without file path
            my $bin_name = File::Basename::basename( $1 );
            #$desc .= " executable $bin_name script text";
            # $io->close;
            # return( "text/x-${bin_name}" );
            $default = "text/x-${bin_name}";
        }
    }
    my $out = $self->handle( $io, $desc, { default => $default } );
    $io->close;
    return( $out );
}

sub follow_links { return( shift->_set_get_boolean( 'follow_links', @_ ) ); }

sub handle 
{
    my $self = shift( @_ );
    my $io = shift( @_ );
    my $desc = shift( @_ );
    my $opts = $self->_get_args_as_hash( @_ );
    $opts->{default} = $self->default_type if( !length( $opts->{default} ) );
    my $type = '';
    
    # 3) Iterate over each magic entry.
    my $match_found = 0;
    for( my $m = 0; $m <= $#{ $self->{magic_data} }; $m++ ) 
    {
        # Check if the m-th magic entry matches and if it does, then $desc will contain 
        # an updated description
        my $test;
        if( ( $test = $self->_magic_match( $self->{magic_data}->[$m], \$desc, $io ) ) ) 
        {
            if( defined( $desc ) && $desc ne '' ) 
            {
                $match_found = 1;
                $type = $desc;
                last;
            }
        }
        elsif( !defined( $test ) )
        {
            warnings::warn( "Error occurred while checking for match: ", $self->error ) if( warnings::enabled() && $self->debug );
        }
    
        # Read another entry from the magic file if we've exhausted all the entries 
        # already buffered. read_magic_entry will add to the end of the array 



( run in 1.996 second using v1.01-cache-2.11-cpan-39bf76dae61 )