Apache-ProxyRewrite

 view release on metacpan or  search on metacpan

ProxyRewrite.pm  view on Meta::CPAN

# the minimum extent necessary to make such provision valid and
# enforceable.
#
# If Recipient institutes patent litigation against a Contributor with
# respect to a patent applicable to software (including a cross-claim
# or counterclaim in a lawsuit), then any patent licenses granted by
# that Contributor to such Recipient under this Agreement shall
# terminate as of the date such litigation is filed. In addition, If
# Recipient institutes patent litigation against any entity (including
# a cross-claim or counterclaim in a lawsuit) alleging that the
# Program itself (excluding combinations of the Program with other
# software or hardware) infringes such Recipient's patent(s), then
# such Recipient's rights granted under Section 2(b) shall terminate
# as of the date such litigation is filed.
#
# All Recipient's rights under this Agreement shall terminate if it
# fails to comply with any of the material terms or conditions of this
# Agreement and does not cure such failure in a reasonable period of
# time after becoming aware of such noncompliance. If all Recipient's
# rights under this Agreement terminate, Recipient agrees to cease use
# and distribution of the Program as soon as reasonably practicable.
# However, Recipient's obligations under this Agreement and any
# licenses granted by Recipient relating to the Program shall continue
# and survive.
#
# IBM may publish new versions (including revisions) of this Agreement
# from time to time. Each new version of the Agreement will be given a
# distinguishing version number. The Program (including Contributions)
# may always be distributed subject to the version of the Agreement
# under which it was received. In addition, after a new version of the
# Agreement is published, Contributor may elect to distribute the
# Program (including its Contributions) under the new version. No one
# other than IBM has the right to modify this Agreement. Except as
# expressly stated in Sections 2(a) and 2(b) above, Recipient receives
# no rights or licenses to the intellectual property of any
# Contributor under this Agreement, whether expressly, by implication,
# estoppel or otherwise. All rights in the Program not expressly
# granted under this Agreement are reserved.
#
# This Agreement is governed by the laws of the State of New York and
# the intellectual property laws of the United States of America. No
# party to this Agreement will bring a legal action under this
# Agreement more than one year after the cause of action arose. Each
# party waives its rights to a jury trial in any resulting litigation.
#
###############################################################################


# Package name
package Apache::ProxyRewrite;


# Required libraries
use strict;
use Apache;
use Apache::Constants qw(OK AUTH_REQUIRED DECLINED DONE);
use Apache::Log;
use Apache::URI;
use LWP::UserAgent;
use Socket;
use URI::Escape qw(uri_unescape);


# Global variables
$Apache::ProxyRewrite::VERSION = '0.17';
$Apache::ProxyRewrite::PRODUCT = 'ProxyRewrite/' .
  $Apache::ProxyRewrite::VERSION;
my %LINK_ELEMENTS =
( # These represent all the possible valid tags that have links in them
 'a'       => 'href',
 'applet'  => {
               'archive'    => 1,
               'code'       => 1,
               'codebase'   => 1,
              },
 'area'    => 'href',
 'base'    => 'href',
 'body'    => 'background',
 'embed'   => 'src',
 'form'    => 'action',
 'frame'   => 'src',
 'img'     => {
               'src'        => 1,
               'lowsrc'     => 1,
               'usemap'     => 1,
              },
 'input'   => 'src',
 'isindex' => 'action',
 'link'    => {
               'href'       => 1,
               'src'        => 1,
              },
 'meta'    => {
               'content'    => 1,
               'http-equiv' => 1,
              },
 'object'  => {
               'classid'    => 1,
               'codebase'   => 1,
               'data'       => 1,
               'name'       => 1,
               'usemap'     => 1,
              },
 'script'  => 'src',
 'td'      => 'background',
 'th'      => 'background',
 'tr'      => 'background',
);


###############################################################################
###############################################################################
# handler: hook into Apache/mod_perl API
###############################################################################
###############################################################################
sub handler {
  my $r = shift;
  my %mappings = ();
  my ($auth_info, $auth_redirect, $remote_location) = undef;

  %mappings = split(/\s*(?:=>|,)\s*/, $r->dir_config('ProxyRewrite'));

ProxyRewrite.pm  view on Meta::CPAN

  # fetch URL
  $r->log->info("ProxyRewrite: Preparing to fetch ", $r->uri,
		" at time ", time);
  my $response = &fetch($r, $remote_location, $remote_site,
			$auth_info, \%mappings);

  # rewrite response URIs as needed
  $r->log->info("ProxyRewrite: Preparing to rewrite URIs for ", $r->uri,
		" at time ", time);
  if ($response->header('Content-type') =~ m!^text/html!) {
    &parse($r, $remote_site, $response, \%mappings);
  }

  # respond to client
  $r->log->info("ProxyRewrite: Preparing to respond for ", $r->uri,
		" at time ", time);
  &respond($r, $remote_site, $remote_location, $auth_redirect,
	   $response, \%mappings);

  return OK;
}

###############################################################################
###############################################################################
# fetch: fetch the remote URL and return a reference to the response object
###############################################################################
###############################################################################
sub fetch {
  my ($r, $remote_location, $remote_site, $auth_info, $mapref) = @_;
  my $client_agent = '';
  my $my_uri = '';
  my ($k, $v);
  my $base = $r->location();
  my $args = $r->args();
  if ($base ne '/') {
    ($my_uri = $r->uri) =~ s/^$base//;
  } else {
    $my_uri = $r->uri;
  }
  $my_uri = $remote_location . $my_uri;
  $my_uri .= '?' . $r->args() if $args;

  my $request = HTTP::Request->new($r->method, $my_uri);

  $r->log->info("ProxyRewrite::fetch: Time proxy request method created: ", time);
  $r->log->debug("fetch: Base URI (aka location section): $base");
  $r->log->info("ProxyRewrite::fetch: Request for $my_uri with method ", $r->method);

  my(%headers_in) = $r->headers_in;
  while(($k,$v) = each %headers_in) {
    # HACK to force no Keep-Alives on the connection between proxy
    # and remote server
    $r->log->debug("fetch: IN $k: $v");
    if ($k =~ /Connection/) {
      $v = "Close";
    } elsif ($k =~ /Host/) {
      ($v) = ($remote_location =~ m!://([^/]+)!);
    } elsif ($k =~ /User-Agent/) {
      $client_agent = $v;
    }
    $v = uri_unescape($v);
    $request->header($k,$v);	
    $r->log->debug("fetch: IN-MOD $k: $v");
  }

  # If we have authorization information and it isn't already filled in
  if ($auth_info && !$request->authorization()) {
    $request->authorization($auth_info);
  }

  if ($r->method eq "POST") {
    my $content;
    if ($r->headers_in->{'Content-type'} eq 'application/x-www-form-urlencoded') {
      $content = $r->content;
    } else {
      $r->read($content, $r->headers_in->{'Content-length'});
    }
    $request->content($content);
    $r->log->debug("fetch: Request type: ", $r->method);
    $r->log->debug("fetch: Request content type: ",
		   $r->headers_in->{'Content-type'});
    $r->log->debug("fetch: Request content: $content");
  }

  $r->log->debug("fetch: Product: $Apache::ProxyRewrite::PRODUCT");
  my $ua = new LWP::UserAgent;
  if ($client_agent ne '') {
    $ua->agent("$client_agent; $Apache::ProxyRewrite::PRODUCT");
  } else {
    $ua->agent("$Apache::ProxyRewrite::PRODUCT");
  }
  my $res = $ua->simple_request($request);
  $r->log->info("ProxyRewrite::fetch: Time proxy got document: ", time);
  $r->log->info("ProxyRewrite::fetch: Original document size: ",
		length($res->content));

  return($res);
}

###############################################################################
###############################################################################
# parse: parse HTML and find all embedded URLs
###############################################################################
###############################################################################
sub parse {
  my ($r, $remote_site, $response, $mapref) = @_;
  my $buf = $response->content;
  my ($lessthanpos, $greaterthanpos, $prediff, $diff,
      $preblock, $tagblock, $lastblock);
  my $pos = 0;
  my $newbuf = '';
  my $iscomment = 0;
  my $buflen = length($buf);

  while (($lessthanpos = index($buf, "<", $pos)) > -1) {
    # Make a special case out of the comment in case there
    # are nested tags within the comment, such as javascript code
    # fragments. Not necessarily our problem, but it doesn't hurt much
    # to deal with it.
    if (substr($buf, $lessthanpos + 1, 3) eq '!--') {
      $greaterthanpos = index($buf, "-->", $lessthanpos);



( run in 1.093 second using v1.01-cache-2.11-cpan-df04353d9ac )