Apache-Gateway
view release on metacpan or search on metacpan
MuxSite ftp://ftp.digital.com/pub/plan/perl/CPAN/
Site ftp://ftp.orst.edu/pub/packages/CPAN/
Site ftp://ftp.funet.fi/pub/languages/perl/CPAN/
</LocationMatch>
With the C<Site> and C<MuxSite> directives here, the first request
will be forwarded to ftp.perl.org. If it fails, the request will be
retried with cdrom, digital, orst, and funet, in that order. The
next request for that process will be tried with ftp.perl.org first
again. If it fails, retries then go to digital, cdrom, orst, and
finally funet.
A good general strategy for packages with multiple mirrors might be
to specify one or two nearby sites to try first. Then specify some
multiplexed sites slightly further away in case the nearby sites
fail. Finally, fall back to the primary site if all else fails.
=item ClockBroken server-URL upstream^2-TZ upstream-TZ
When caching is employed and requests can be gatewayed to multiple
mirrors, timestamp correctness becomes more important. Unfortunately,
timestamps on mirrored files are usually wrong. For example, the
popular Perl B<mirror> program is generally configured to match
timestamps using the local timezone both locally and on the server it
is mirroring. This strategy is only guaranteed to work if both
servers are in the same timezone.
Example:
ClockBroken ftp://ftp.cdrom.com EET PST8PDT
cdrom gets files from funet, which seems always to use the EET
timezone (which is two hours off from GMT) for purposes of mirroring.
cdrom, however, uses the PST8PDT timezone, so that 00:00 on funet
differs from 00:00 on cdrom by 9 or 10 hours, depending upon whether
or not Daylight Savings Time is in effect. The example ClockBroken
line corrects for this disparity.
Note: timezones are those understood by Time::Zone.
=back
=head1 FUNCTIONS
The following internal functions are documented (mostly useful for
hackers):
=over 4
=cut
use strict;
use vars qw(@ISA);
use Exporter ();
@ISA = qw(Exporter);
$Apache::Gateway::VERSION = sprintf("%d.%02d", q$Revision: 1.12 $ =~ /(\d+)\.(\d+)/g);
use Apache::Constants ':server'; # for SERVER_VERSION for Via comment
use Apache::URI ();
use HTTP::Date ();
use HTTP::Request ();
use HTTP::Status ();
use IO::File ();
use LWP::UserAgent ();
use Time::Zone ();
# In an Apache::Registry script, we would need to make the following
# variables global. However, making them global seems unnecesary in a
# handler.
my %default_port = (finger => 79,
ftp => 21,
gopher => 70,
http => 80,
https => 443,
nntp => 119,
prospero => 1525,
rlogin => 513,
snews => 563,
telnet => 23,
wais => 210,
webster => 765,
whois => 43,
);
my $gw;
=item $gw = Apache::Gateway->new( [$ua] )
Construct a new Apache::Gateway object describing a gateway. If a
LWP::UserAgent is not provided, a new one will be created. Note: the
user agent is modified for seach request; it is not constant and is
probably not shareable.
=cut
sub new($;$) {
my $proto = shift;
my $class = ref($proto) || $proto;
my $self = {};
$self->{UA} = @_ ? shift : new LWP::UserAgent,
$self->{CONFIG} = {};
bless($self, $class);
return $self;
}
=item $gw->user_agent( [$ua] )
Get/set the user agent.
=cut
sub user_agent($;$) {
my $self = shift;
if (@_) { $self->{UA} = shift }
return $self->{UA};
}
=item $gw->request( [$r] )
# request.
$r->status($1) if $response->status_line =~ /^(\d+)/;
$r->status_line($response->status_line);
# Attempt to abort on failure.
return if $allow_abort && $response->is_error;
# $r->log_error('Gateway: ' . $response->request->url
# . ' ' . $response->status_line);
# configuration info for this directory
my $loc_conf = $self->location_config;
# Try to modify Content-Base to refer to our multiplexer.
if(my $base = $response->header('Content-Base')) {
# where site appears on gateway, e.g., <http://www.perl.com/CPAN/>.
my $gw_site = server_name($r) . $loc_conf->{ROOT};
$response->header(Content_Base => $base)
if $base =~ s/^$site/$gw_site/;
}
# If necessary, try to compensate for servers with broken clocks.
if(my $lm = $response->last_modified) {
my $upstream_server = server_name_from_URL($r,
$response->request->url->as_string);
if(exists $loc_conf->{BROKEN_CLOCK}{$upstream_server}) {
my $TZ = $loc_conf->{BROKEN_CLOCK}{$upstream_server};
$response->last_modified($lm + diff_TZ($$TZ[1], $$TZ[0]));
}
}
$self->update_via_header_field($response);
copy_header_to_Apache_request($r, $response);
$r->send_http_header;
}
=item redirect($allow_abort);
Try a redirect. We do this via C<LWP::UserAgent> because
C<internal_redirect_handler> does not provide hooks for detecting and
recovering from errors.
=cut
sub redirect($$) {
my ($self, $allow_abort) = @_;
my $r = $self->{REQUEST};
my $ua = $self->{UA};
my $site = $self->{SITE};
my $path = $self->{GW_PATH};
my $url = Apache::URI->parse($r, $site . $path);
# If this is an anon-FTP request, fill in the password with the
# UA's from field.
if($url->scheme eq 'ftp' && $url->user eq 'anonymous') {
$url->password($ua->from) # anon-FTP passwd
}
my $request = HTTP::Request->new($r->method, $url->unparse);
# If upstream server has a broken clock, calculate how much we
# need to adjust condition GET time fields. Note: this code won't
# work correctly if we get redirected to another server with a
# different clock. Oh, well.
my $loc_conf = $self->location_config;
my $upstream_server = server_name_from_URL($r, $url);
my $broken_clock = 0;
if(exists $loc_conf->{BROKEN_CLOCK}{$upstream_server}) {
my $TZ = $loc_conf->{BROKEN_CLOCK}{$upstream_server};
$broken_clock = diff_TZ($$TZ[1], $$TZ[0]);
}
if(my $IMS = $r->header_in('If-Modified-Since')) {
$request->if_modified_since(HTTP::Date::str2time($IMS)
- $broken_clock);
}
if(my $IUmS = $r->header_in('If-Unmodified-Since')) {
$request->if_unmodified_since(HTTP::Date::str2time($IUmS)
- $broken_clock);
}
$request->header(Accept => $r->header_in('Accept'));
# Pragma directives must be passed through.
if(my $pragma = $r->header_in('Pragma')) {
$request->header($pragma);
$request->header('Cache-Control' => 'no-cache') # HTTP/1.1
if $pragma =~ /^no-cache$/i;
}
# Cache directives must be passed through.
if(my $cache = $r->header_in('Cache-Control')) {
$request->header('Cache-Control' => $cache);
}
# We would like the first callback to occur as soon as reasonably
# possible after the headers have been retrieved. Thus, we need a
# small size argument because the first callback may not occur
# until all the headers plus size bytes of the content have been
# retrieved.
my $headers_printed;
my $response = $ua->request($request,
sub {
my($data, $response) = @_;
$self->print_headers($response, $allow_abort)
unless $headers_printed;
$headers_printed = 1;
return if($allow_abort && $response->is_error
|| $r->connection->aborted);
$r->print($data);
},
1024);
# Be sure we've printed the headers. We need this check here
# because callback will never get called for responses with no
# content.
$self->print_headers($response, $allow_abort)
unless $headers_printed || $r->connection->aborted;
( run in 3.423 seconds using v1.01-cache-2.11-cpan-437f7b0c052 )