Apache-Gateway
view release on metacpan or search on metacpan
229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305=item $gw = Apache::Gateway->new( [$ua] )
Construct a new Apache::Gateway object describing a gateway. If a
LWP::UserAgent is not provided, a new one will be created. Note: the
user agent is modified for seach request; it is not constant and is
probably not shareable.
=cut
sub
new($;$) {
my
$proto
=
shift
;
my
$class
=
ref
(
$proto
) ||
$proto
;
my
$self
= {};
$self
->{UA} =
@_
?
shift
: new LWP::UserAgent,
$self
->{CONFIG} = {};
bless
(
$self
,
$class
);
return
$self
;
}
=item $gw->user_agent( [$ua] )
Get/set the user agent.
=cut
sub
user_agent($;$) {
my
$self
=
shift
;
if
(
@_
) {
$self
->{UA} =
shift
}
return
$self
->{UA};
}
=item $gw->request( [$r] )
Get/set the Apache request currently being gatewayed. To send the
request, see the send_request method.
=cut
sub
request($;$) {
my
$self
=
shift
;
if
(
@_
) {
$self
->{REQUEST} =
shift
}
return
$self
->{REQUEST};
}
# $gw->_config( [$config] )
# Get/set the cached configuration information and current run state.
# This very low-level method is for hackers only. This API might
# change.
sub
_config($;$) {
my
$self
=
shift
;
if
(
@_
) {
$self
->{CONFIG} =
shift
}
return
$self
->{CONFIG};
}
=item $gw->location_config( [$config] )
Get/set the configuration information for this gateway location. Can
be overridden to provide dynamic per location information
=cut
sub
location_config($;$) {
my
$self
=
shift
;
my
$config_file
=
$self
->{REQUEST}->dir_config(
'GatewayConfig'
);
if
(
@_
) {
$self
->{CONFIG}{
$config_file
} =
shift
}
return
$self
->{CONFIG}{
$config_file
};
}
# $gw->_init_config_file
#
# If necessary, parse and cache a configuration file specified by the
# GatewayConfig variable. On error, sets
317318319320321322323324325326327328329330331332333334335336# ... }
# ROOT => location of root of gateway,
# TIMEOUT => timeout in seconds for contacting upstream server
# }
# site = a site URL, e.g., http://www.perl.com/CPAN/
# mux sites list = { START_INDEX => start index of round robin,
# SITE => [ site, site, ... ] }
# This structure is subject to change. Because it contains state
# information, it is per object and cannot be shared.
sub
_init_config_file($) {
my
$self
=
shift
;
my
$r
=
$self
->{REQUEST};
my
$config
=
$self
->{CONFIG};
my
$config_file
=
$r
->dir_config(
'GatewayConfig'
);
unless
(
$config_file
) {
$r
->log_error(
'no GatewayConfig'
);
$r
->status(HTTP::Status::RC_INTERNAL_SERVER_ERROR);
return
;
}
425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513
return
1;
}
=item clear_headers_for_redirect($r)
Clear request headers in $r in preparation for a redirect.
=cut
sub
clear_headers_for_redirect($) {
my
$r
=
shift
;
# Some of this should be done with Apache::Tie when it is working.
$r
->header_out(
'Content-Length'
=>
undef
);
# should use tie
$r
->status(HTTP::Status::RC_OK);
my
%err
=
$r
->err_headers_out;
# should use tie
foreach
(
keys
%err
) {
$r
->err_header_out(
$_
=>
undef
);
}
}
=item canonicalized_server_URL($scheme, $hostname, $port)
Return semicanonicalized server URL (without trailing slash).
=cut
sub
canonicalized_server_URL($$$) {
my
(
$scheme
,
$host
,
$port
) =
@_
;
my
$server
=
lc
(
$scheme
.
'://'
.
$host
);
if
(
defined
$port
and
exists
$default_port
{
$scheme
}
and
$port
!=
$default_port
{
$scheme
}) {
$server
.=
':'
.
$port
;
}
return
$server
;
}
=item server_name_from_URL($r, $url)
Return the (somewhat canonicalized) "server name" portion of the URL.
The "server name" is defined as the leading scheme://authority portion
of the URL.
=cut
sub
server_name_from_URL($$) {
my
(
$r
,
$url
) =
@_
;
$url
= Apache::URI->parse(
$r
,
$url
)
unless
ref
$url
;
return
canonicalized_server_URL(
$url
->scheme,
$url
->hostname,
$url
->port);
}
=item server_name($r)
Return the (somewhat canonicalized) "server name" portion of the
URL of this server. The "server name" is defined as the leading
scheme://authority portion of the URL. Currently assumes server
access is via HTTP.
=cut
sub
server_name($) {
my
$r
=
shift
;
return
canonicalized_server_URL(
'http'
,
$r
->server->server_hostname,
$r
->server->port);
}
=item diff_TZ($origin_TZ, $mirror_TZ)
Get the usual time difference (in seconds) between the two time zones.
Will yield the wrong results in the midst of a change to/from daylight
savings time. Specifically, as used in this module, this function
will return the wrong results when applied to files retrieved by the
mirror during the two hours of the year when one server is in Daylight
Savings Time and the other is not.
=cut
sub
diff_TZ($$) {
my
(
$mirror_TZ
,
$origin_TZ
) =
@_
;
return
0
if
$origin_TZ
eq
$mirror_TZ
;
# no need to do anything
# Use Thu Jan 01 00:00:00 GMT 1998 as a reference time. No
# changes to/from Daylight Savings Time occurred near this time.
my
$reference_time
= 883612800;
return
Time::Zone::tz_offset(Time::Zone::tz2zone(
$mirror_TZ
),
$reference_time
)
520521522523524525526527528529530531532533534535536537538539540Update Via header in HTTP::Response
with
information about this hop.
Hop information combines protocol information from the message
with
server information from the B<Apache> server. The server name
returned is hardcoded as
'C<apache>'
.
Eventually, options should be provided to control hostname suppression
and comment customization.
=cut
sub update_via_header_field($$) {
my($self, $response) = @_;
my $r = $self->{REQUEST};
# Set protocol.
my $hop = $response->protocol;
# Oops. No protocol. Try to guess from request.
unless(defined $hop) {
$hop = (uc(Apache::URI->parse($r, $response->request->url)->scheme)
. '/unknown');
564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602Copy the headers from an C<HTTP::Headers> object to an
C<Apache::Request>. Hope that the B<Apache> request object will later
out the headers in
"Good Practice"
order (there appears to be
no
way of controlling this).
The only tricky item is the Content-Type header, which needs special
handling.
=cut
sub copy_header_to_Apache_request($$) {
my($r, $header) = @_;
# Apache might already know the proper content type, e.g., by use
# of a ForceType directive. If so, try not to override it. Else,
# the type needs to be set explicitly with the Apache request's
# content_type method: simply setting the header value isn't
# enough.
if(defined $r->content_type) {
$header->content_type(undef);
}
else {
$r->content_type($header->content_type);
}
# Copy headers to Apache request (in "Good Practice" order).
$header->scan(sub {$r->header_out(@_);});
}
sub print_headers($$$) {
my ($self, $response, $allow_abort) = @_;
my $r = $self->{REQUEST};
my $site = $self->{SITE};
my $path = $self->{GW_PATH};
# Copy status code and reason phrase from response to Apache
# request.
$r->status($1) if $response->status_line =~ /^(\d+)/;
$r->status_line($response->status_line);
634635636637638639640641642643644645646647648649650651652653654}
=item redirect($allow_abort);
Try a redirect. We do this via C<LWP::UserAgent> because
C<internal_redirect_handler> does not provide hooks for detecting and
recovering from errors.
=cut
sub
redirect($$) {
my
(
$self
,
$allow_abort
) =
@_
;
my
$r
=
$self
->{REQUEST};
my
$ua
=
$self
->{UA};
my
$site
=
$self
->{SITE};
my
$path
=
$self
->{GW_PATH};
my
$url
= Apache::URI->parse(
$r
,
$site
.
$path
);
# If this is an anon-FTP request, fill in the password with the
720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774
unless
$headers_printed
||
$r
->connection->aborted;
}
=item $gw->site( [$site] )
Get/set the site tried. Can be used to determine which upstream
server actually fields a request.
=cut
sub
site($;$) {
my
$self
=
shift
;
if
(
@_
) {
$self
->{SITE} =
shift
}
return
$self
->{SITE};
}
=item $gw->try_URI($allow_abort)
Try the site $gw->site. Ideally, we could use
C<Apache::internal_redirect_handler> to try the redirects. However,
it provides no hook for detecting an error and aborting output.
That's not B<mod_perl>'s fault--B<Apache> source would need to be
modified to support such a hook.
=cut
sub
try_URI($$) {
my
(
$self
,
$allow_abort
) =
@_
;
clear_headers_for_redirect(
$self
->{REQUEST});
$self
->redirect(
$allow_abort
);
}
=item try_sites($allow_last_site_abort, @site)
Try sites in order until one succeeds. $allow_last_site_abort
indicates if the last site can/should be aborted after examing the
head for its error code. All other sites always allow premature
abortion.
Abortion is needed because only one request can be allowed to run to
completion and produce a message body.
=cut
sub
try_sites($$@) {
my
(
$self
,
$allow_last_site_abort
,
@site
) =
@_
;
my
$r
=
$self
->{REQUEST};
# Try all but last site, aborting each attempt on error.
for
(
my
$i
= 0;
$i
<=
$#site
; ++
$i
) {
if
(
ref
$site
[
$i
]) {
# Try this group of sites, starting at index $idx.
my
$mux_site
=
$site
[
$i
];
my
$idx
=
$mux_site
->{START_INDEX};
786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875
}
# We can exit if the last attempt succeeded or if the client
# is no longer talking to us.
return
if
(!HTTP::Status::is_error(
$r
->status)
||
$r
->connection->aborted);
}
}
# Set up the user agent for this particular request.
sub
_init_ua($) {
my
$self
=
shift
;
my
$r
=
$self
->{REQUEST};
my
$ua
=
$self
->{UA};
$ua
->from(
$r
->server->server_admin);
$ua
->agent(
$r
->header_in(
'User-Agent'
));
$ua
->timeout(
$self
->location_config->{TIMEOUT});
return
1;
# succeeded
}
# Set $self->{GW_PATH} to the portion of the path relative to
# GatewayRoot. This is also the path which is appended to the URIs of
# the upstream servers.
sub
_init_path($) {
my
$self
=
shift
;
my
$r
=
$self
->{REQUEST};
# epath = $gw_root . $gw_path
my
$gw_root
=
$self
->location_config->{ROOT};
my
(
$gw_path
) =
$r
->parsed_uri->path =~ /^\Q
$gw_root
\E(.*)/;
unless
(
defined
$gw_path
) {
# error
$r
->log_error(
$r
->uri .
' does not begin with '
.
$gw_root
);
$r
->status(HTTP::Status::RC_INTERNAL_SERVER_ERROR);
return
;
}
$self
->{GW_PATH} =
$gw_path
;
# succeeded
return
1;
}
sub
_init_request($) {
my
$self
=
shift
;
$self
->_init_config_file or
return
;
$self
->_init_ua or
return
;
$self
->_init_path or
return
;
return
1;
# succeeded
}
=item $gw->site_list
Get the list of sites to try for this request. Can be overridden to
customize the list of sites to try.
By default, this method looks through the LocationMatch sections in
the GatewayConfig file in order and returns the sites in the first
section matched.
=cut
sub
site_list($) {
my
$self
=
shift
;
my
$location_conf
=
$self
->location_config;
my
$gw_path
=
$self
->{GW_PATH};
foreach
my
$entry
(@{
$location_conf
->{LOCATION}}) {
if
(
$gw_path
=~ /
$entry
->{PATTERN}/) {
return
@{
$entry
->{SITE}};
}
}
return
;
}
=item $gw->send_request( [$r] )
Send the Apache request to the upstream server. Optionally sets it
first.
=cut
sub
send_request($;$) {
my
$self
=
shift
;
if
(
@_
) {
$self
->{REQUEST} =
shift
}
$self
->_init_request or
return
;
$self
->try_sites(0,
$self
->site_list);
return
1;
# succeeded
}
sub
handler {
if
(!
defined
$gw
) {
$gw
= new Apache::Gateway;
( run in 0.332 second using v1.01-cache-2.11-cpan-55f5a4728d2 )