Apache2-SSI

 view release on metacpan or  search on metacpan

lib/Apache2/SSI/URI.pm  view on Meta::CPAN

    debug => $self->debug,
    path_info => '',
    query_string => '',
    _path_info_processed => 1,
    };
    $hash->{document_path} = $hash->{document_uri};
    $hash->{apache_request} = $self->apache_request if( $self->apache_request );
    my $root = bless( $hash => ref( $self ) );
    # Scalar::Util::weaken( $copy );
    $root->{base_dir} = $root;
    $root->{base_uri} = $root;
    $self->{root} = $root;
    return( $root );
}

# shortcut
sub uri { return( shift->document_uri( @_ ) ); }

# Path info works as a path added to a document uri, such as:
# /my/doc.html/path/info
# But we need to distinguish with missing document hierarchy inside a directory, such as:
# /my/folder/missing_doc.html/path/info
# otherwise we would be treating /missing_doc.html/path/info as a path info
sub _find_path_info
{
    my $self = shift( @_ );
    my( $uri_path, $doc_root ) = @_;
    $doc_root //= $self->document_root;
    my $qs = '';
    my $sep = $DIR_SEP;
    $sep = '/' if( !length( $sep ) );
    if( Scalar::Util::blessed( $uri_path ) && $uri_path->isa( 'URI::file' ) )
    {
        $uri_path = $uri_path->file;
    }
    my $u = $self->collapse_dots( $uri_path );
    $qs = $u->query;
    $uri_path = $u->path;
    # Pass the OS to ensure we get ./ss/include.cgi becomes .\ssi\include.cgi
    my $path = URI::file->new( $uri_path )->file( $^O );
    $doc_root = $doc_root->file( $^O ) if( Scalar::Util::blessed( $doc_root ) && $doc_root->isa( 'URI::file' ) );
    $doc_root = substr( $doc_root, 0, length( $doc_root ) - length( $sep ) ) if( substr( $doc_root, -length( $sep ), length( $sep ) ) eq $sep );
    return( $self->error( "URI path must be an absolute path starting with '/'. Path provided was \"$uri_path\"." ) ) if( substr( $uri_path, 0, 1 ) ne '/' );
    # No need to go further
    if( -e( "${doc_root}${path}" ) )
    {
        return({
            filepath => "${doc_root}${path}",
            path => $uri_path,
            query_string => $qs,
            code => 200,
        });
    }
    elsif( $uri_path eq '/' )
    {
        return({
            filepath => $doc_root,
            path => $uri_path,
            path_info => undef(),
            query_string => $qs,
            code => ( -e( $doc_root ) ? 200 : 404 ),
        });
    }
    my @parts = split( '/', substr( $uri_path, 1 ) );
    my $trypath = '';
    my $trypath_uri = '';
    my $pathinfo = '';
    foreach my $p ( @parts )
    {
        # The last path was a directory, and we cannot find the element within. So, the rest of the path is not path info, but rather a 404 missing document hierarchy
        # We test the $pathinfo string, so we do not bother checking further if it is already set.
        if( !$pathinfo && -d( "${doc_root}${trypath}" ) && !-e( "${doc_root}${trypath}/${p}" ) )
        {
            # We return the original path provided (minus any query string)
            return({
                filepath => $doc_root . ( length( $trypath ) ? $trypath :  $path ),
                path => $uri_path,
                code => 404,
                query_string => $qs,
            });
        }
        elsif( !$pathinfo && -e( "${doc_root}${trypath}/${p}" ) )
        {
            $trypath_uri .= "/${p}";
            $trypath  .= "${sep}${p}";
        }
        else
        {
            $pathinfo .= "/$p";
        }
    }
    return({
        filepath => "${doc_root}${trypath}",
        path => $trypath_uri,
        path_info => $pathinfo,
        code => 200,
        query_string => $qs,
    });
}

# *_set_env = \&Apache2::SSI::_set_env;
# This is different from the env() method. This one is obviously private
# whereas the env() one has triggers that could otherwise create an infinite loop.
sub _set_env
{
    my $self = shift( @_ );
    my $name = shift( @_ );
    return( $self->error( "No environment variable name provided." ) ) if( !length( $name ) );
    $self->{_env} = {} if( !ref( $self->{_env} ) );
    my $env = $self->{_env};
    my $r = $self->apache_request;
    if( @_ )
    {
        my $v = shift( @_ );
        $r->subprocess_env( $name => $v ) if( $r );
        $env->{ $name } = $v;
    }
    return( $self );
}

sub _trim_trailing_slash
{
    my $self = shift( @_ );
    my $uri  = shift( @_ );
    return( $self->error( "No uri provided to trim trailing slash." ) ) if( !length( "$uri" ) );
    unless( Scalar::Util::blessed( $uri ) && $uri->isa( 'URI' ) )
    {
        $uri = $self->new_uri( "$uri" );
    }
    if( substr( $uri->path, -1, 1 ) eq '/' && length( $uri->path ) > 1 )
    {
        # By splitting the string on '/' and without the last argument for split being -1, perl will remove trailing blank entries
        $uri->path( join( '/', split( '/', $uri->path ) ) );
    }
    return( $uri );
}

1;

lib/Apache2/SSI/URI.pm  view on Meta::CPAN

    # Changing the base uri, which is used to resolve relative uri
    $uri->base_uri( '/ssi' );

    my $uri2 = $uri->clone;
    $uri2->filename( '/home/john/some-file.txt' );
    die( "No such file\n" ) if( $uri2->finfo->filetype == Apache2::SSI::Finfo::FILETYPE_NOFILE );

    my $dir = $uri->document_directory;

    # Full path to the filename, e.g. /home/john/www/some/dir/file.html
    # Possible dots are resolved /home/john/www/some/dir/../ssi/../dir/./file.html => /home/john/www/some/dir/file.html
    my $filename = $uri->document_filename;

    # The uri without path info or query string
    my $path = $uri->document_path;

    my $doc_root = $uri->document_root;
    
    # The document uri including path info, and query string if any
    my $u = $uri->document_uri;

    my $req_uri = $uri->env( 'REQUEST_URI' );

    # Access to the Apache2::SSI::Finfo object
    my $finfo = $uri->finfo;

    # A new Apache2::SSI::URI object
    my $uri3 = $uri->new_uri( document_uri => '/some/where/about.html', document_root => '/home/john/www' );

    # Returns /some/uri
    my $parent = $uri->parent;

    # The uri is now /some/uri/file.html/some/path
    $uri->path_info( '/some/path' );

    # The uri is now /some/uri/file.html/some/path?q=something&l=ja_JP
    $uri->query_string( 'q=something&l=ja_JP' );

    my $html = $uri->slurp_utf8;
    my $raw = $uri->slurp({ binmode => ':raw' });

    # Same as $uri->document_uri
    my $uri = $uri->uri;

=head1 VERSION

    v0.1.3

=head1 DESCRIPTION

L<Apache2::SSI::URI> is used to manipulate and query http uri. It is used by L<Apache2::SSI> both for the main query, and also for sub queries like when there is an C<include> directive.

In this case, there would be the main document uri such as C</some/path/file.html> and containing a directive such as:

    <!--#include virtual="../other.html" -->

An L<Apache2::SSI::URI> object would be instantiated to process the uri C<../other.html>, flatten the dots and get its underlying filename.

Even if the uri provided does not exist, am L<Apache2::SSI::URI> object would still be returned, so you need to check if the file exists by doing:

    if( $uri->code == 404 )
    {
        die( "Not there\n" );
    }

Or, this would work too:

    if( $uri->finfo->filetype == Apache2::SSI::Finfo::FILETYPE_NOFILE )
    {
        die( "No such file !\n" );
    }

=head1 METHODS

=head2 new

This instantiate an object that is used to access other key methods. It takes the following parameters:

=over 4

=item C<apache_request>

This is the L<Apache2::RequestRec> object that is provided if running under mod_perl.

it can be retrieved from L<Apache2::RequestUtil/request> or via L<Apache2::Filter/r>

You can get this L<Apache2::RequestRec> object by requiring L<Apache2::RequestUtil> and calling its class method L<Apache2::RequestUtil/request> such as C<Apache2::RequestUtil->request> and assuming you have set C<PerlOptions +GlobalRequest> in your ...

Note that there is a main request object and subprocess request object, so to find out which one you are dealing with, use L<Apache2::RequestUtil/is_initial_req>, such as:

    use Apache2::RequestUtil (); # extends Apache2::RequestRec objects
    my $r = $r->is_initial_req ? $r : $r->main;

=item C<base_uri>

This is the base uri which is used to make uri absolute.

For example, if the main document uri is C</some/folder/file.html> containing a directive:

    <!--#include virtual="../other.html" -->

One would instantiate an object using C</some/folder/file.html> as the base_uri like this:

    my $uri = Apache2::SSI::URI->new(
        base_uri => '/some/folder/file.html',
        apache_request => $r,
        document_uri => '../other.html',
        # No need to specify document_root, because it will be derived from 
        # the Apache2::RequestRec provided with the apache_request parameter.
    );

=item C<document_root>

This is only necessary to be provided if this is not running under Apache mod_perl. Without this value, L<Apache2::SSI> has no way to guess the document root and will not be able to function properly and will return an L</error>.

=item C<document_uri>

This is only necessary to be provided if this is not running under Apache mod_perl. This must be the uri of the document being served, such as C</my/path/index.html>. So, if you are using this outside of the rim of Apache mod_perl and your file resid...

=back

=head2 apache_request

Sets or gets the L<Apache2::RequestRec> object. As explained in the L</new> method, you can get this Apache object by requiring the package L<Apache2::RequestUtil> and calling L<Apache2::RequestUtil/request> such as C<Apache2::RequestUtil->request> a...

When running under Apache mod_perl this is set automatically from the special L</handler> method, such as:

    my $r = $f->r; # $f is the Apache2::Filter object provided by Apache

=for Pod::Coverage base_dir

=head2 base_uri

Sets or gets the base reference uri. This is used to render the L</document_uri> provided an absolute uri.

=head2 clone

Create a clone of the object and return it.

=head2 code

Sets or gets the http code for this uri.

    $uri->code( 404 );

=head2 collapse_dots

Provided with an uri, and this will resolve the path and removing the dots, such as C<.> and C<..> and return an L<URI> object.

This is done as per the L<RFC 3986 section 5.2.4 algorithm|https://tools.ietf.org/html/rfc3986#page-33>

    my $uri = $ssi->collapse_dots( '/../a/b/../c/./d.html' );
    # would become /a/c/d.html
    my $uri = $ssi->collapse_dots( '/../a/b/../c/./d.html?foo=../bar' );
    # would become /a/c/d.html?foo=../bar
    $uri->query # foo=../bar

=for Pod::Coverage document_dir

=head2 document_directory

Returns an L<Apache2::SSI::URI> object of the current directory of the L</document_uri> provided.

This can also be called as C<$uri->document_dir>

=head2 document_filename

This is an alias for L<Apache2::SSI::URI/filename>

=head2 document_path

Sets or gets the uri path to the document. This is the same as L</document_uri>, except it is striped from L</query_string> and L</path_info>.

=head2 document_root

Sets or gets the document root.

Wen running under Apache mod_perl, this value will be available automatically, using L<Apache2::RequestRec/document_root> method.

If it runs outside of Apache, this will use the value provided upon instantiating the object and passing the C<document_root> parameter. If this is not set, it will return the value of the environment variable C<DOCUMENT_ROOT>.

=head2 document_uri

Sets or gets the document uri, which is the uri of the document being processed.

For example:

    /index.html

Under Apache, this will get the environment variable C<DOCUMENT_URI> or calls the L<Apache2::RequestRec/uri> method.

Outside of Apache, this will rely on a value being provided upon instantiating an object, or the environment variable C<DOCUMENT_URI> be present.

The value should be an absolute uri.

=head2 env

Sets or gets environment variables that are distinct for this uri.

    $uri->env( REQUEST_URI => '/some/path/file.html' );
    my $loc = $uri->env( 'REQUEST_URI' );

If it is called without any parameters, it returns all the environment variables as a hash reference:



( run in 1.811 second using v1.01-cache-2.11-cpan-39bf76dae61 )