Apache-Gateway

 view release on metacpan or  search on metacpan

Gateway.pm  view on Meta::CPAN

229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
=item $gw = Apache::Gateway->new( [$ua] )
 
Construct a new Apache::Gateway object describing a gateway.  If a
LWP::UserAgent is not provided, a new one will be created.  Note: the
user agent is modified for seach request; it is not constant and is
probably not shareable.
 
=cut
 
sub new($;$) {
    my $proto = shift;
    my $class = ref($proto) || $proto;
 
    my $self = {};
    $self->{UA} = @_ ? shift : new LWP::UserAgent,
    $self->{CONFIG} = {};
 
    bless($self, $class);
    return $self;
}
 
=item $gw->user_agent( [$ua] )
 
Get/set the user agent.
 
=cut
 
sub user_agent($;$) {
    my $self = shift;
    if (@_) { $self->{UA} = shift }
    return $self->{UA};
}
 
=item $gw->request( [$r] )
 
Get/set the Apache request currently being gatewayed.  To send the
request, see the send_request method.
 
=cut
 
sub request($;$) {
    my $self = shift;
    if (@_) { $self->{REQUEST} = shift }
    return $self->{REQUEST};
}
 
# $gw->_config( [$config] )
 
# Get/set the cached configuration information and current run state.
# This very low-level method is for hackers only.  This API might
# change.
 
sub _config($;$) {
    my $self = shift;
    if (@_) { $self->{CONFIG} = shift }
    return $self->{CONFIG};
}
 
=item $gw->location_config( [$config] )
 
Get/set the configuration information for this gateway location.  Can
be overridden to provide dynamic per location information
 
=cut
 
sub location_config($;$) {
    my $self = shift;
    my $config_file = $self->{REQUEST}->dir_config('GatewayConfig');
    if (@_) { $self->{CONFIG}{$config_file} = shift }
    return $self->{CONFIG}{$config_file};
}
 
# $gw->_init_config_file
#
# If necessary, parse and cache a configuration file specified by the
# GatewayConfig variable.  On error, sets

Gateway.pm  view on Meta::CPAN

317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
#                            ... }
#          ROOT => location of root of gateway,
#          TIMEOUT => timeout in seconds for contacting upstream server
#       }
#  site = a site URL, e.g., http://www.perl.com/CPAN/
#  mux sites list = { START_INDEX => start index of round robin,
#                     SITE => [ site, site, ... ] }
 
# This structure is subject to change.  Because it contains state
# information, it is per object and cannot be shared.
sub _init_config_file($) {
    my $self = shift;
    my $r = $self->{REQUEST};
    my $config = $self->{CONFIG};
    my $config_file = $r->dir_config('GatewayConfig');
    unless ($config_file) {
        $r->log_error('no GatewayConfig');
        $r->status(HTTP::Status::RC_INTERNAL_SERVER_ERROR);
        return;
    }

Gateway.pm  view on Meta::CPAN

425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
    return 1;
}
 
=item clear_headers_for_redirect($r)
 
Clear request headers in $r in preparation for a redirect.
 
=cut
 
sub clear_headers_for_redirect($) {
    my $r = shift;
    # Some of this should be done with Apache::Tie when it is working.
    $r->header_out('Content-Length' => undef); # should use tie
    $r->status(HTTP::Status::RC_OK);
 
    my %err = $r->err_headers_out; # should use tie
    foreach (keys %err) {
        $r->err_header_out($_ => undef);
    }
}
 
=item canonicalized_server_URL($scheme, $hostname, $port)
 
Return semicanonicalized server URL (without trailing slash).
 
=cut
 
sub canonicalized_server_URL($$$) {
    my($scheme, $host, $port) = @_;
    my $server = lc($scheme . '://' . $host);
    if(defined $port and exists $default_port{$scheme}
       and $port != $default_port{$scheme}) {
        $server .= ':' . $port;
    }
    return $server;
}
 
=item server_name_from_URL($r, $url)
 
Return the (somewhat canonicalized) "server name" portion of the URL.
The "server name" is defined as the leading scheme://authority portion
of the URL.
 
=cut
 
sub server_name_from_URL($$) {
    my ($r, $url) = @_;
    $url = Apache::URI->parse($r, $url) unless ref $url;
    return canonicalized_server_URL($url->scheme, $url->hostname, $url->port);
}
 
=item server_name($r)
 
Return the (somewhat canonicalized) "server name" portion of the
URL of this server.  The "server name" is defined as the leading
scheme://authority portion of the URL.  Currently assumes server
access is via HTTP.
 
=cut
 
sub server_name($) {
    my $r = shift;
    return canonicalized_server_URL('http', $r->server->server_hostname,
                                    $r->server->port);
}
 
=item diff_TZ($origin_TZ, $mirror_TZ)
 
Get the usual time difference (in seconds) between the two time zones.
Will yield the wrong results in the midst of a change to/from daylight
savings time.  Specifically, as used in this module, this function
will return the wrong results when applied to files retrieved by the
mirror during the two hours of the year when one server is in Daylight
Savings Time and the other is not.
 
=cut
 
sub diff_TZ($$) {
    my($mirror_TZ, $origin_TZ) = @_;
 
    return 0 if $origin_TZ eq $mirror_TZ; # no need to do anything
 
    # Use Thu Jan 01 00:00:00 GMT 1998 as a reference time.  No
    # changes to/from Daylight Savings Time occurred near this time.
    my $reference_time = 883612800;
 
    return Time::Zone::tz_offset(Time::Zone::tz2zone($mirror_TZ),
                                 $reference_time)

Gateway.pm  view on Meta::CPAN

520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
Update Via header in HTTP::Response with information about this hop.
Hop information combines protocol information from the message with
server information from the B<Apache> server.  The server name
returned is hardcoded as 'C<apache>'.
 
Eventually, options should be provided to control hostname suppression
and comment customization.
 
=cut
 
sub update_via_header_field($$) {
    my($self, $response) = @_;
    my $r = $self->{REQUEST};
 
    # Set protocol.
    my $hop = $response->protocol;
 
    # Oops.  No protocol.  Try to guess from request.
    unless(defined $hop) {
        $hop = (uc(Apache::URI->parse($r, $response->request->url)->scheme)
                . '/unknown');

Gateway.pm  view on Meta::CPAN

564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
Copy the headers from an C<HTTP::Headers> object to an
C<Apache::Request>.  Hope that the B<Apache> request object will later
print out the headers in "Good Practice" order (there appears to be no
way of controlling this).
 
The only tricky item is the Content-Type header, which needs special
handling.
 
=cut
 
sub copy_header_to_Apache_request($$) {
    my($r, $header) = @_;
 
    # Apache might already know the proper content type, e.g., by use
    # of a ForceType directive.  If so, try not to override it.  Else,
    # the type needs to be set explicitly with the Apache request's
    # content_type method: simply setting the header value isn't
    # enough.
    if(defined $r->content_type) {
        $header->content_type(undef);
    }
    else {
        $r->content_type($header->content_type);
    }
 
    # Copy headers to Apache request (in "Good Practice" order).
    $header->scan(sub {$r->header_out(@_);});
}
 
sub print_headers($$$) {
    my ($self, $response, $allow_abort) = @_;
    my $r = $self->{REQUEST};
    my $site = $self->{SITE};
    my $path = $self->{GW_PATH};
 
    # Copy status code and reason phrase from response to Apache
    # request.
    $r->status($1) if $response->status_line =~ /^(\d+)/;
    $r->status_line($response->status_line);

Gateway.pm  view on Meta::CPAN

634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
}
 
=item redirect($allow_abort);
 
Try a redirect.  We do this via C<LWP::UserAgent> because
C<internal_redirect_handler> does not provide hooks for detecting and
recovering from errors.
 
=cut
 
sub redirect($$) {
    my ($self, $allow_abort) = @_;
 
    my $r = $self->{REQUEST};
    my $ua = $self->{UA};
    my $site = $self->{SITE};
    my $path = $self->{GW_PATH};
 
    my $url = Apache::URI->parse($r, $site . $path);
 
    # If this is an anon-FTP request, fill in the password with the

Gateway.pm  view on Meta::CPAN

720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
      unless $headers_printed || $r->connection->aborted;
}
 
=item $gw->site( [$site] )
 
Get/set the site tried.  Can be used to determine which upstream
server actually fields a request.
 
=cut
 
sub site($;$) {
    my $self = shift;
    if (@_) { $self->{SITE} = shift }
    return $self->{SITE};
}
 
=item $gw->try_URI($allow_abort)
 
Try the site $gw->site.  Ideally, we could use
C<Apache::internal_redirect_handler> to try the redirects.  However,
it provides no hook for detecting an error and aborting output.
That's not B<mod_perl>'s fault--B<Apache> source would need to be
modified to support such a hook.
 
=cut
 
sub try_URI($$) {
    my ($self, $allow_abort) = @_;
    clear_headers_for_redirect($self->{REQUEST});
    $self->redirect($allow_abort);
}
 
=item try_sites($allow_last_site_abort, @site)
 
Try sites in order until one succeeds.  $allow_last_site_abort
indicates if the last site can/should be aborted after examing the
head for its error code.  All other sites always allow premature
abortion.
 
Abortion is needed because only one request can be allowed to run to
completion and produce a message body.
 
=cut
 
sub try_sites($$@) {
    my ($self, $allow_last_site_abort, @site) = @_;
 
    my $r = $self->{REQUEST};
 
    # Try all but last site, aborting each attempt on error.
    for(my $i = 0; $i <= $#site; ++$i) {
        if(ref $site[$i]) {
            # Try this group of sites, starting at index $idx.
            my $mux_site = $site[$i];
            my $idx = $mux_site->{START_INDEX};

Gateway.pm  view on Meta::CPAN

786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
        }
 
        # We can exit if the last attempt succeeded or if the client
        # is no longer talking to us.
        return if(!HTTP::Status::is_error($r->status)
                  || $r->connection->aborted);
    }
}
 
# Set up the user agent for this particular request.
sub _init_ua($) {
    my $self = shift;
    my $r = $self->{REQUEST};
    my $ua = $self->{UA};
    $ua->from($r->server->server_admin);
    $ua->agent($r->header_in('User-Agent'));
    $ua->timeout($self->location_config->{TIMEOUT});
    return 1;                   # succeeded
}
 
# Set $self->{GW_PATH} to the portion of the path relative to
# GatewayRoot.  This is also the path which is appended to the URIs of
# the upstream servers.
sub _init_path($) {
    my $self = shift;
    my $r = $self->{REQUEST};
 
    # epath = $gw_root . $gw_path
    my $gw_root = $self->location_config->{ROOT};
    my ($gw_path) = $r->parsed_uri->path =~ /^\Q$gw_root\E(.*)/;
 
    unless(defined $gw_path) {  # error
        $r->log_error($r->uri . ' does not begin with ' . $gw_root);
        $r->status(HTTP::Status::RC_INTERNAL_SERVER_ERROR);
        return;
    }
 
    $self->{GW_PATH} = $gw_path; # succeeded
    return 1;
}
 
sub _init_request($) {
    my $self = shift;
    $self->_init_config_file or return;
    $self->_init_ua          or return;
    $self->_init_path                or return;
    return 1;                   # succeeded
}
 
=item $gw->site_list
 
Get the list of sites to try for this request.  Can be overridden to
customize the list of sites to try.
 
By default, this method looks through the LocationMatch sections in
the GatewayConfig file in order and returns the sites in the first
section matched.
 
=cut
 
sub site_list($) {
    my $self = shift;
    my $location_conf = $self->location_config;
    my $gw_path = $self->{GW_PATH};
    foreach my $entry (@{$location_conf->{LOCATION}}) {
        if($gw_path =~ /$entry->{PATTERN}/) {
            return @{$entry->{SITE}};
        }
    }
    return;
}
 
=item $gw->send_request( [$r] )
 
Send the Apache request to the upstream server.  Optionally sets it
first.
 
=cut
 
sub send_request($;$) {
    my $self = shift;
    if (@_) { $self->{REQUEST} = shift }
    $self->_init_request or return;
    $self->try_sites(0, $self->site_list);
    return 1;                   # succeeded
}
 
sub handler {
    if(! defined $gw) {
        $gw = new Apache::Gateway;



( run in 0.332 second using v1.01-cache-2.11-cpan-55f5a4728d2 )