Apache-RewritingProxy
view release on metacpan or search on metacpan
RewritingProxy.pm view on Meta::CPAN
package Apache::RewritingProxy;
use strict;
use Apache::Constants qw(:common);
use vars '$req';
use vars '$res';
use vars '$proxiedCookieJar';
use vars '$replayCookies';
use vars '$serverCookies';
use vars '$jar';
use vars '$textHandlerSub';
use vars qw($VERSION @ISA @EXPORT);
$|=1;
$VERSION = '0.7';
# use DynaLoader ();
# @ISA = qw(DynaLoader Exporter);
use Exporter();
@ISA = qw(Exporter);
@EXPORT = qw(new handler fixLink fetchURL);
# This is the directory in which cacheing will eventually take
# place. More importantly, a subdirectory of this named cookies
# MUST exist and be writeable by the web server. This is where users'
# cookie jars are stored.
$Apache::RewritingProxy::cacheRoot = '/web/httpd/RewritingProxy';
sub new
{
my $class = shift;
my $self = {};
bless $self,$class;
return $self;
}
sub handler
{
my $r = shift;
$textHandlerSub = shift;
# Find the URL we are to fetch...
my $urlToFetch = substr($r->path_info,1);
# I only know one protocol thus far.
return DECLINED if ($urlToFetch !~ /^http:/);
# Get the resource and shovel it out to the client.
&fetchURL($r, $urlToFetch);
# If nothing has happened thus far, we'll assume that's good.
return OK;
}
##################################
# sub fetchURL
#
# Parameters:
#
# $r - an Apache request object
# $url - the URL to fetch and process.
#
# Returns:
#
# OK if it's happy.
# an HTTP response code if one other than 200
# is received.
sub fetchURL
{
# This is the guy who actually grabs the page and then parses it.
# My goal is to find all of the links made in the urlToFetch
# and rewrite them to be absolute links passing through this module
# again.
use Apache::Util qw(:all);
use LWP::UserAgent;
use HTML::TokeParser;
use HTTP::Cookies;
use CGI;
my $r = shift;
my $url = shift;
my $ua = new LWP::UserAgent;
# As we form the request to go to the remote server,
# We should stuff any cookies that might be relavant
# into the request. We need to use the Table class
# here to fetch the cookies and see what cookies
# apply to $url. We then sent those cookies
# in the request after yanking out our own URL from
# the cookies.
# Fetch a cookie named RewritingProxy from the client.
my $cookieKey;
my $clientCookies = $r->header_in('Cookie');
my @clientCookiePairs = split (/; /, $clientCookies);
my $thisClientCookiePair;
foreach $thisClientCookiePair (@clientCookiePairs)
{
my ($name,$value) = split (/=/, $thisClientCookiePair);
$cookieKey = $value if ($name eq "RewritingProxyCookieJar");
}
# Set the cookie to be the client's current IP (doesn' really matter).
# Set the cookie to expire in 6 months.
# TODO: Make this thing refresh if a client keeps using the proxy.
if (!$cookieKey)
{
$cookieKey = $r->get_remote_host();
my $cookieString = "RewritingProxyCookieJar=$cookieKey; expires=".
ht_time(time+518400). "; path=/; domain=".$r->get_server_name;
$r->header_out('Set-Cookie'=>$cookieString);
}
# We now need to open the User's cookie jar and see if any cookies
# need to be sent to this particular server.
$jar = "$Apache::RewritingProxy::cacheRoot/cookies/$cookieKey";
$serverCookies = HTTP::Cookies->new(
File => "$jar",
ignore_discard=>1,
AutoSave=>1);
# Load the cookies into memory...
$serverCookies->load() if (-e $jar);
# Let's take care of Referer also.
my $referer = $r->header_in('Referer');
my $script_name = $r->location;
$referer =~ s/(.*$script_name\/)//i;
# Let's carry the User Agent to the server also.
# TODO: We need to include the proxied via header here.
my $browser = $r->header_in('User-Agent');
$ua->agent($browser);
# We have to append the query string since it got munged by
# apache when this was first requested.
my $rurl = $url;
$rurl .= "?". $r->args if ($r->args && $url !~ /\?/);
if ($r->method eq 'GET')
{
$req = new HTTP::Request 'GET' => "$rurl";
$req->header('Referer'=>"$referer");
$serverCookies->add_cookie_header($req);
# This needs to be a simple request or else the redirects will
# not work very nicely. LWP is too smart sometimes.
$res = $ua->simple_request($req);
}
elsif ($r->method eq 'POST')
{
# This is a little bit of tricky ju ju here.
# We will use another PERLy package to
# prepare the URL and pack in the encoded form data.
use URI::URL;
my %FORM;
$req = new HTTP::Request 'POST' => "$rurl";
$req->header('Referer'=>"$referer");
$req->content_type('application/x-www-form-urlencoded');
$serverCookies->add_cookie_header($req);
# $req->content('$buffer');
my $pair;
my @pairs = split (/&/, $r->content);
# TODO: This next bit more efficiently.
# It works for the occasional cgi, but not for constant
# hammering away at this code. There has to be a better
# and more OOP way.
foreach $pair (@pairs)
{
my ($name, $value) = split (/=/, $pair);
# Un-Webify plus signs and %-encoding
$value =~ tr/+/ /;
$value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
$FORM{$name} = $value;
}
# Now we build the new URL structure with the form data
# to be sent to the server on the client's behalf.
my $curl = url("http:");
$curl->query_form(%FORM);
$req->content($curl->equery);
$res = $ua->simple_request($req);
}
# We need to store any cookies the server sent to us for future use.
#Old cookie jar...
# TODO: Make this much much better. We still need to lock the cookie
# jar to keep simultaneous requests from killing each others' changes.
while (-e "$Apache::RewritingProxy::cacheRoot/cookies/$cookieKey.lock")
{sleep(1);}
open (LOCK, ">$Apache::RewritingProxy::cacheRoot/cookies/$cookieKey.lock");
print LOCK " ";
close LOCK;
# New cookies sent by the server...
my $responseCookies = HTTP::Cookies->new;
$responseCookies->extract_cookies($res);
$responseCookies->scan(\&storeCookies);
# Store the old plus the new...
# $proxiedCookieJar->save();
unlink ("$Apache::RewritingProxy::cacheRoot/cookies/$cookieKey.lock");
sub storeCookies
{
my $version = shift;
my $key = shift;
my $val = shift;
my $path = shift;
my $domain = shift;
my $port = shift;
my $path_spec = shift;
my $secure = shift;
my $expires = shift;
my $discard = shift;
my $hash = shift;
# if (!$expires )
# {
# $expires = ht_time(time+3600, '%Y-%m-%d %H:%M:%S',0);
# }
my $proxiedCookieJar = HTTP::Cookies->new(
File => "$jar",
ignore_discard=>1,
AutoSave=>1);
$proxiedCookieJar->load();
$proxiedCookieJar->set_cookie($version,$key,$val,$path,
$domain,$port,$path_spec,$secure,$expires);
$proxiedCookieJar->save();
}
if ($res->code =~ /^3/)
{
# This means it was a server redirect.
# We should process the headers and insert
# ourselves into the headers everywhere we need to.
my $textHeaders = $res->headers_as_string;
# We need the address of the current script.
my ($tmpUri,$junk) = split (/\/http/i, $r->uri);
my $script_home = $r->get_server_name .":".
$r->server->port . $tmpUri;
# Replace any redirect links with a link pointing through us.
if ($textHeaders =~ /Location: http:/i)
{
$textHeaders =~ s#http:#http://$script_home/http:#i;
}
else
{
$textHeaders =~ s#Location: (.*)
#Location: http://$script_home/$url$1#i;
}
# Dump out the headers as though we had created them.
# Nothing like a little bit of http-plagiarism.
$r->send_cgi_header($textHeaders);
}
# We only process html documents. Maybe someday we will
# work on other types, but there is no need right now since
# this program only wants to look at web pages anyhow.
if ($res->content_type =~ /html/i)
{
my $content = $res->content; # The actual text
my $outString = ""; # The content the user sees
my $baseHref = ""; # storage space for <base
my $p = HTML::TokeParser->new(\$content)
|| $r->log_error("No Content: $!");
# TODO: This needs to be changed from warn!
while (my $tolkens = $p->get_token )
{
my $text = "";
# We process all of the possible token types.
# text and comments are printed unmolested to the browser.
# Javascript would have to be parsed out by editing the text
# between script tags.
if ($tolkens->[0] eq 'T')
{
if ($textHandlerSub)
{
$outString .= &{$textHandlerSub}($r,$tolkens->[1]);
}
else
{
$outString .= mainTextHandler($r,$tolkens->[1]);
}
# $outString .= $tolkens->[1];
( run in 0.479 second using v1.01-cache-2.11-cpan-22024b96cdf )