HTTP-Proxy
view release on metacpan or search on metacpan
lib/HTTP/Proxy.pm view on Meta::CPAN
# constants used for logging
use constant ERROR => -1; # always log
use constant NONE => 0; # never log
use constant PROXY => 1; # proxy information
use constant STATUS => 2; # HTTP status
use constant PROCESS => 4; # sub-process life (and death)
use constant SOCKET => 8; # low-level connections
use constant HEADERS => 16; # HTTP headers
use constant FILTERS => 32; # Messages from filters
use constant DATA => 64; # Data received by the filters
use constant CONNECT => 128; # Data transmitted by the CONNECT method
use constant ENGINE => 256; # Internal information from the Engine
use constant ALL => 511; # All of the above
# modules that need those constants to be defined
use HTTP::Proxy::Engine;
use HTTP::Proxy::FilterStack;
# Methods we can forward
my %METHODS;
# HTTP (RFC 2616)
$METHODS{http} = [qw( CONNECT DELETE GET HEAD OPTIONS POST PUT TRACE )];
# WebDAV (RFC 2518)
$METHODS{webdav} = [
@{ $METHODS{http} },
qw( COPY LOCK MKCOL MOVE PROPFIND PROPPATCH UNLOCK )
];
# Delta-V (RFC 3253)
$METHODS{deltav} = [
@{ $METHODS{webdav} },
qw( BASELINE-CONTROL CHECKIN CHECKOUT LABEL MERGE MKACTIVITY
MKWORKSPACE REPORT UNCHECKOUT UPDATE VERSION-CONTROL ),
];
# the whole method list
@METHODS = HTTP::Proxy->known_methods();
# useful regexes (from RFC 2616 BNF grammar)
my %RX;
$RX{token} = qr/[-!#\$%&'*+.0-9A-Z^_`a-z|~]+/;
$RX{mime} = qr($RX{token}/$RX{token});
$RX{method} = '(?:' . join ( '|', @METHODS ) . ')';
$RX{method} = qr/$RX{method}/;
sub new {
my $class = shift;
my %params = @_;
# some defaults
my %defaults = (
agent => undef,
chunk => 4096,
daemon => undef,
host => 'localhost',
logfh => *STDERR,
logmask => NONE,
max_connections => 0,
max_keep_alive_requests => 10,
port => 8080,
stash => {},
timeout => 60,
via => undef,
x_forwarded_for => 1,
);
# non modifiable defaults
my $self = bless { conn => 0, loop => 1 }, $class;
# support for deprecated stuff
{
my %convert = (
maxchild => 'max_clients',
maxconn => 'max_connections',
maxserve => 'max_keep_alive_requests',
);
while( my ($old, $new) = each %convert ) {
if( exists $params{$old} ) {
$params{$new} = delete $params{$old};
carp "$old is deprecated, please use $new";
}
}
}
# get attributes
$self->{$_} = exists $params{$_} ? delete( $params{$_} ) : $defaults{$_}
for keys %defaults;
if (!defined $self->{via}) {
$self->{via} =
hostname()
. ( $self->{port} != 80 ? ":$self->{port}" : '' )
. " (HTTP::Proxy/$VERSION)";
}
# choose an engine with the remaining parameters
$self->{engine} = HTTP::Proxy::Engine->new( %params, proxy => $self );
$self->log( PROXY, "PROXY", "Selected engine " . ref $self->{engine} );
return $self;
}
sub known_methods {
my ( $class, @args ) = @_;
@args = map { lc } @args ? @args : ( keys %METHODS );
exists $METHODS{$_} || carp "Method group $_ doesn't exist"
for @args;
my %seen;
return grep { !$seen{$_}++ } map { @{ $METHODS{$_} || [] } } @args;
}
sub timeout {
my $self = shift;
my $old = $self->{timeout};
if (@_) {
$self->{timeout} = shift;
$self->agent->timeout( $self->{timeout} ) if $self->agent;
}
return $old;
}
sub url {
my $self = shift;
if ( not defined $self->daemon ) {
carp "HTTP daemon not started yet";
return undef;
}
return $self->daemon->url;
}
# normal accessors
for my $attr ( qw(
agent chunk daemon host logfh port request response hop_headers
logmask via x_forwarded_for client_headers engine
max_connections max_keep_alive_requests
)
)
{
no strict 'refs';
*{"HTTP::Proxy::$attr"} = sub {
my $self = shift;
my $old = $self->{$attr};
$self->{$attr} = shift if @_;
return $old;
}
}
# read-only accessors
for my $attr (qw( conn loop client_socket )) {
no strict 'refs';
*{"HTTP::Proxy::$attr"} = sub { $_[0]{$attr} }
}
sub max_clients { shift->engine->max_clients( @_ ) }
# deprecated methods are still supported
{
my %convert = (
maxchild => 'max_clients',
maxconn => 'max_connections',
maxserve => 'max_keep_alive_requests',
);
while ( my ( $old, $new ) = each %convert ) {
no strict 'refs';
*$old = sub {
carp "$old is deprecated, please use $new";
goto \&$new;
};
}
}
sub stash {
my $stash = shift->{stash};
return $stash unless @_;
return $stash->{ $_[0] } if @_ == 1;
return $stash->{ $_[0] } = $_[1];
}
sub new_connection { ++$_[0]{conn} }
sub start {
my $self = shift;
$self->init;
$SIG{INT} = $SIG{TERM} = sub { $self->{loop} = 0 };
# the main loop
my $engine = $self->engine;
$engine->start if $engine->can('start');
while( $self->loop ) {
$engine->run;
last if $self->max_connections && $self->conn >= $self->max_connections;
}
$engine->stop if $engine->can('stop');
$self->log( STATUS, "STATUS",
"Processed " . $self->conn . " connection(s)" );
return $self->conn;
}
# semi-private init method
sub init {
my $self = shift;
# must be run only once
return if $self->{_init}++;
$self->_init_daemon if ( !defined $self->daemon );
$self->_init_agent if ( !defined $self->agent );
# specific agent config
$self->agent->requests_redirectable( [] );
$self->agent->agent(''); # for TRACE support
$self->agent->protocols_allowed( [qw( http https ftp gopher )] );
# standard header filters
$self->{headers}{request} = HTTP::Proxy::FilterStack->new;
$self->{headers}{response} = HTTP::Proxy::FilterStack->new;
# the same standard filter is used to handle headers
my $std = HTTP::Proxy::HeaderFilter::standard->new();
$std->proxy( $self );
$self->{headers}{request}->push( [ sub { 1 }, $std ] );
$self->{headers}{response}->push( [ sub { 1 }, $std ] );
# standard body filters
$self->{body}{request} = HTTP::Proxy::FilterStack->new(1);
$self->{body}{response} = HTTP::Proxy::FilterStack->new(1);
return;
}
#
# private init methods
#
sub _init_daemon {
my $self = shift;
my %args = (
LocalAddr => $self->host,
LocalPort => $self->port,
ReuseAddr => 1,
);
delete $args{LocalPort} unless $self->port; # 0 means autoselect
my $daemon = HTTP::Daemon->new(%args)
or die "Cannot initialize proxy daemon: $!";
$self->daemon($daemon);
return $daemon;
}
sub _init_agent {
my $self = shift;
my $agent = LWP::UserAgent->new(
env_proxy => 1,
keep_alive => 2,
parse_head => 0,
timeout => $self->timeout,
)
or die "Cannot initialize proxy agent: $!";
$self->agent($agent);
return $agent;
}
# This is the internal "loop" that lets the child process process the
# incoming connections.
sub serve_connections {
my ( $self, $conn ) = @_;
my $response;
$self->{client_socket} = $conn; # read-only
$self->log( SOCKET, "SOCKET", "New connection from " . $conn->peerhost
. ":" . $conn->peerport );
my ( $last, $served ) = ( 0, 0 );
while ( $self->loop() ) {
my $req;
{
local $SIG{INT} = local $SIG{TERM} = 'DEFAULT';
$req = $conn->get_request();
}
$served++;
# initialisation
$self->request($req);
$self->response(undef);
# Got a request?
unless ( defined $req ) {
$self->log( SOCKET, "SOCKET",
"Getting request failed: " . $conn->reason )
if $conn->reason ne 'No more requests from this connection';
return;
}
$self->log( STATUS, "REQUEST", $req->method . ' '
. ( $req->method eq 'CONNECT' ? $req->uri->host_port : $req->uri ) );
# can we forward this method?
if ( !grep { $_ eq $req->method } @METHODS ) {
$response = HTTP::Response->new( 501, 'Not Implemented' );
$response->content_type( "text/plain" );
$response->content(
"Method " . $req->method . " is not supported by this proxy." );
$self->response($response);
goto SEND;
}
# transparent proxying support
if( not defined $req->uri->scheme ) {
if( my $host = $req->header('Host') ) {
$req->uri->scheme( 'http' );
$req->uri->host( $host );
}
else {
lib/HTTP/Proxy.pm view on Meta::CPAN
# the callback is not called by LWP::UA->request
# in some cases (HEAD, redirect, error responses have no body)
if ( !$sent ) {
$self->response($response);
$self->{$_}{response}->select_filters( $response )
for qw( headers body );
$self->{headers}{response}
->filter( $response->headers, $response );
}
# do a last pass, in case there was something left in the buffers
my $data = ""; # FIXME $protocol is undef here too
$self->{body}{response}->filter_last( \$data, $response, undef );
if ( length $data ) {
if ($chunked) {
printf $conn "%x$CRLF%s$CRLF", length($data), $data;
}
else { print $conn $data; }
}
# last chunk
print $conn "0$CRLF$CRLF" if $chunked; # no trailers either
$self->response($response);
# what about X-Died and X-Content-Range?
if( my $died = $response->header('X-Died') ) {
$self->log( ERROR, "ERROR", $died );
$sent = 0;
$response = HTTP::Response->new( 500, "Proxy filter error" );
$response->content_type( "text/plain" );
$response->content($died);
$self->response($response);
}
SEND:
$response = $self->response ;
# responses that weren't filtered through callbacks
# (empty body or error)
# FIXME some error response headers might not be filtered
if ( !$sent ) {
($last, $chunked) = $self->_send_response_headers( $served );
my $content = $response->content;
if ($chunked) {
printf $conn "%x$CRLF%s$CRLF", length($content), $content
if length($content); # the filter may leave nothing
print $conn "0$CRLF$CRLF";
}
else { print $conn $content; }
}
# FIXME ftp, gopher
$conn->print( $response->content )
if defined $req->uri->scheme
and $req->uri->scheme =~ /^(?:ftp|gopher)$/
and $response->is_success;
$self->log( SOCKET, "SOCKET", "Connection closed by the proxy" ), last
if $last || $served >= $self->max_keep_alive_requests;
}
$self->log( SOCKET, "SOCKET", "Connection closed by the client" )
if !$last
and $served < $self->max_keep_alive_requests;
$self->log( PROCESS, "PROCESS", "Served $served requests" );
$conn->close;
}
# INTERNAL METHOD
# send the response headers for the proxy
# expects $served (number of requests served)
# returns $last and $chunked (last request served, chunked encoding)
sub _send_response_headers {
my ( $self, $served ) = @_;
my ( $last, $chunked ) = ( 0, 0 );
my $conn = $self->client_socket;
my $response = $self->response;
# correct headers
$response->remove_header("Content-Length")
if $self->{body}{response}->will_modify();
$response->header( Server => "HTTP::Proxy/$VERSION" )
unless $response->header( 'Server' );
$response->header( Date => time2str(time) )
unless $response->header( 'Date' );
# this is adapted from HTTP::Daemon
if ( $conn->antique_client ) { $last++ }
else {
my $code = $response->code;
$conn->send_status_line( $code, $response->message,
$self->request()->protocol() );
if ( $code =~ /^(1\d\d|[23]04)$/ ) {
# make sure content is empty
$response->remove_header("Content-Length");
$response->content('');
}
elsif ( $response->request && $response->request->method eq "HEAD" )
{ # probably OK, says HTTP::Daemon
}
else {
if ( $conn->proto_ge("HTTP/1.1") ) {
$chunked++;
$response->push_header( "Transfer-Encoding" => "chunked" );
$response->push_header( "Connection" => "close" )
if $served >= $self->max_keep_alive_requests;
}
else {
$last++;
$conn->force_last_request;
}
}
print $conn $response->headers_as_string($CRLF);
print $conn $CRLF; # separates headers and content
}
$self->log( STATUS, "RESPONSE", $response->status_line );
$self->log( HEADERS, "RESPONSE", $response->headers->as_string );
return ($last, $chunked);
}
# INTERNAL method
# FIXME no man-in-the-middle for now
sub _handle_CONNECT {
my ($self, $served) = @_;
my $last = 0;
my $conn = $self->client_socket;
my $req = $self->request;
my $upstream;
# connect upstream
if ( my $up = $self->agent->proxy('http') ) {
# clean up authentication info from proxy URL
$up =~ s{^http://[^/\@]*\@}{http://};
# forward to upstream proxy
$self->log( PROXY, "PROXY",
"Forwarding CONNECT request to next proxy: $up" );
my $response = $self->agent->simple_request($req);
# check the upstream proxy's response
my $code = $response->code;
if ( $code == 407 ) { # don't forward Proxy Authentication requests
my $response_407 = $response->as_string;
$response_407 =~ s/^Client-.*$//mg;
$response = HTTP::Response->new(502);
$response->content_type("text/plain");
$response->content( "Upstream proxy ($up) "
. "requested authentication:\n\n"
. $response_407 );
$self->response($response);
return $last;
}
elsif ( $code != 200 ) { # forward every other failure
$self->response($response);
return $last;
}
$upstream = $response->{client_socket};
}
else { # direct connection
$upstream = IO::Socket::INET->new( PeerAddr => $req->uri->host_port );
}
# no upstream socket obtained
lib/HTTP/Proxy.pm view on Meta::CPAN
=item logfh
A filehandle to a logfile (default: C<*STDERR>).
=item logmask( [$mask] )
Be verbose in the logs (default: C<NONE>).
Here are the various elements that can be added to the mask (their values
are powers of 2, starting from 0 and listed here in ascending order):
NONE - Log only errors
PROXY - Proxy information
STATUS - Requested URL, response status and total number
of connections processed
PROCESS - Subprocesses information (fork, wait, etc.)
SOCKET - Information about low-level sockets
HEADERS - Full request and response headers are sent along
FILTERS - Filter information
DATA - Data received by the filters
CONNECT - Data transmitted by the CONNECT method
ENGINE - Engine information
ALL - Log all of the above
If you only want status and process information, you can use:
$proxy->logmask( STATUS | PROCESS );
Note that all the logging constants are not exported by default, but
by the C<:log> tag. They can also be exported one by one.
=item loop (read-only)
Internal. False when the main loop is about to be broken.
=item max_clients
=item maxchild
The maximum number of child process the L<HTTP::Proxy> object will spawn
to handle client requests (default: depends on the engine).
This method is currently delegated to the L<HTTP::Proxy::Engine> object.
C<maxchild> is deprecated and will disappear.
=item max_connections
=item maxconn
The maximum number of TCP connections the proxy will accept before
returning from start(). 0 (the default) means never stop accepting
connections.
C<maxconn> is deprecated.
Note: C<max_connections> will be deprecated soon, for two reasons: 1)
it is more of an L<HTTP::Proxy::Engine> attribute, 2) not all engines will
support it.
=item max_keep_alive_requests
=item maxserve
The maximum number of requests the proxy will serve in a single connection.
(same as C<MaxRequestsPerChild> in Apache)
C<maxserve> is deprecated.
=item port
The proxy L<HTTP::Daemon> port (default: 8080).
=item request
The request originally received by the proxy from the user-agent, which
will be modified by the request filters.
=item response
The response received from the origin server by the proxy. It is
normally C<undef> until the proxy actually receives the beginning
of a response from the origin server.
If one of the request filters sets this attribute, it "short-circuits"
the request/response scheme, and the proxy will return this response
(which is NOT filtered through the response filter stacks) instead of
the expected origin server response. This is useful for caching (though
Squid does it much better) and proxy authentication, for example.
=item stash
The stash is a hash where filters can store data to share between them.
The stash() method can be used to set the whole hash (with a HASH reference).
To access individual keys simply do:
$proxy->stash( 'bloop' );
To set it, type:
$proxy->stash( bloop => 'owww' );
It's also possibly to get a reference to the stash:
my $s = $filter->proxy->stash();
$s->{bang} = 'bam';
# $proxy->stash( 'bang' ) will now return 'bam'
B<Warning:> since the proxy forks for each TCP connection, the data is
only shared between filters in the same child process.
=item timeout
The timeout used by the internal L<LWP::UserAgent> (default: 60).
=item url (read-only)
The url where the proxy can be reached.
lib/HTTP/Proxy.pm view on Meta::CPAN
each line starting with C<$prefix>.
=item is_protocol_supported( $scheme )
Returns a boolean indicating if $scheme is supported by the proxy.
This method is only used internally.
It is essential to allow L<HTTP::Proxy> users to create "pseudo-schemes"
that LWP doesn't know about, but that one of the proxy filters can handle
directly. New schemes are added as follows:
$proxy->init(); # required to get an agent
$proxy->agent->protocols_allowed(
[ @{ $proxy->agent->protocols_allowed }, 'myhttp' ] );
=item new_connection()
Increase the proxy's TCP connections counter. Only used by
L<HTTP::Proxy::Engine> objects.
=back
=head2 Apache-like attributes
L<HTTP::Proxy> has several Apache-like attributes that control the
way the HTTP and TCP connections are handled.
The following attributes control the TCP connection. They are passed to
the underlying L<HTTP::Proxy::Engine>, which may (or may not) use them
to change its behaviour.
=over 4
=item start_servers
Number of child process to fork at the beginning.
=item max_clients
Maximum number of concurrent TCP connections (i.e. child processes).
=item max_requests_per_child
Maximum number of TCP connections handled by the same child process.
=item min_spare_servers
Minimum number of inactive child processes.
=item max_spare_servers
Maximum number of inactive child processes.
=back
Those attributes control the HTTP connection:
=over 4
=item keep_alive
Support for keep alive HTTP connections.
=item max_keep_alive_requests
Maximum number of HTTP connections within a single TCP connection.
=item keep_alive_timeout
Timeout for keep-alive connection.
=back
=head1 EXPORTED SYMBOLS
No symbols are exported by default. The C<:log> tag exports all the
logging constants.
=head1 BUGS
This module does not work under Windows, but I can't see why, and do not
have a development platform under that system. Patches and explanations
very welcome.
I guess it is because C<fork()> is not well supported.
$proxy->maxchild(0);
=over 4
=item However, David Fishburn says:
This did not work for me under WinXP - ActiveState Perl 5.6, but it
DOES work on WinXP ActiveState Perl 5.8.
=back
Several people have tried to help, but we haven't found a way to make it work
correctly yet.
As from version 0.16, the default engine is L<HTTP::Proxy::Engine::NoFork>.
Let me know if it works better.
=head1 SEE ALSO
L<HTTP::Proxy::Engine>, L<HTTP::Proxy::BodyFilter>,
L<HTTP::Proxy::HeaderFilter>, the examples in F<eg/>.
=head1 AUTHOR
Philippe "BooK" Bruhat, E<lt>book@cpan.orgE<gt>.
There is also a mailing-list: http-proxy@mongueurs.net for general
discussion about L<HTTP::Proxy>.
=head1 THANKS
Many people helped me during the development of this module, either on
mailing-lists, IRC or over a beer in a pub...
So, in no particular order, thanks to the libwww-perl team for such a
terrific suite of modules, perl-qa (tips for testing), the French Perl
I<Mongueurs> (for code tricks, beers and encouragements) and my growing
user base... C<;-)>
I'd like to particularly thank Dan Grigsby, who's been using
L<HTTP::Proxy> since 2003 (before the filter classes even existed). He is
apparently making a living from a product based on L<HTTP::Proxy>. Thanks
a lot for your confidence in my work!
( run in 0.969 second using v1.01-cache-2.11-cpan-71847e10f99 )