Furl
view release on metacpan or search on metacpan
lib/Furl/HTTP.pm view on Meta::CPAN
# return response.
if (ref $res_content) {
$res_content = $res_content->get_response_string;
}
return (
$res_minor_version, $res_status, $res_msg, $res_headers, $res_content,
$req_headers, $req_content, undef, undef, [$scheme, $username, $password, $host, $port, $path_query],
);
}
# connects to $host:$port and returns $socket
sub connect :method {
my($self, $host, $port, $timeout_at) = @_;
my $sock;
my $timeout = $timeout_at - time;
return (undef, "Failed to resolve host name: timeout")
if $timeout <= 0;
my ($sock_addr, $err_reason) = $self->_get_address($host, $port, $timeout);
return (undef, "Cannot resolve host name: $host (port: $port), " . ($err_reason || $!))
unless $sock_addr;
RETRY:
socket($sock, Socket::sockaddr_family($sock_addr), SOCK_STREAM, 0)
or Carp::croak("Cannot create socket: $!");
_set_sockopts($sock);
if (connect($sock, $sock_addr)) {
# connected
} elsif ($! == EINPROGRESS || (WIN32 && $! == EWOULDBLOCK)) {
$self->do_select(1, $sock, $timeout_at)
or return (undef, "Cannot connect to ${host}:${port}: timeout");
# connected
} else {
if ($! == EINTR && ! $self->{stop_if}->()) {
close $sock;
goto RETRY;
}
return (undef, "Cannot connect to ${host}:${port}: $!");
}
$sock;
}
sub _get_address {
my ($self, $host, $port, $timeout) = @_;
if ($self->{get_address}) {
return $self->{get_address}->($host, $port, $timeout);
}
# default rule (TODO add support for IPv6)
my $iaddr = $self->{inet_aton}->($host, $timeout)
or return (undef, $!);
pack_sockaddr_in($port, $iaddr);
}
sub _ssl_opts {
my $self = shift;
my $ssl_opts = $self->{ssl_opts};
unless (exists $ssl_opts->{SSL_verify_mode}) {
# set SSL_VERIFY_PEER as default.
$ssl_opts->{SSL_verify_mode} = IO::Socket::SSL::SSL_VERIFY_PEER();
unless (exists $ssl_opts->{SSL_verifycn_scheme}) {
$ssl_opts->{SSL_verifycn_scheme} = 'www'
}
}
if ($ssl_opts->{SSL_verify_mode}) {
unless (exists $ssl_opts->{SSL_ca_file} || exists $ssl_opts->{SSL_ca_path}) {
require Mozilla::CA;
$ssl_opts->{SSL_ca_file} = Mozilla::CA::SSL_ca_file();
}
}
$ssl_opts;
}
# connect SSL socket.
# You can override this method in your child class, if you want to use Crypt::SSLeay or some other library.
# @return file handle like object
sub connect_ssl {
my ($self, $host, $port, $timeout_at) = @_;
_requires('IO/Socket/SSL.pm', 'SSL');
my ($sock, $err_reason) = $self->connect($host, $port, $timeout_at);
return (undef, $err_reason)
unless $sock;
my $timeout = $timeout_at - time;
return (undef, "Cannot create SSL connection: timeout")
if $timeout <= 0;
my $ssl_opts = $self->_ssl_opts;
IO::Socket::SSL->start_SSL(
$sock,
PeerHost => $host,
PeerPort => $port,
Timeout => $timeout,
%$ssl_opts,
) or return (undef, "Cannot create SSL connection: " . IO::Socket::SSL::errstr());
_set_sockopts($sock);
$sock;
}
sub connect_ssl_over_proxy {
my ($self, $proxy_host, $proxy_port, $host, $port, $timeout_at, $proxy_authorization) = @_;
_requires('IO/Socket/SSL.pm', 'SSL');
my $sock = $self->connect($proxy_host, $proxy_port, $timeout_at);
my $p = "CONNECT $host:$port HTTP/1.0\015\012Server: $host\015\012";
if (defined $proxy_authorization) {
$p .= "Proxy-Authorization: $proxy_authorization\015\012";
}
$p .= "\015\012";
$self->write_all($sock, $p, $timeout_at)
or return $self->_r500(
"Failed to send HTTP request to proxy: " . _strerror_or_timeout());
my $buf = '';
my $read = $self->read_timeout($sock,
\$buf, $self->{bufsize}, length($buf), $timeout_at);
if (not defined $read) {
return (undef, "Cannot read proxy response: " . _strerror_or_timeout());
} elsif ( $read == 0 ) { # eof
return (undef, "Unexpected EOF while reading proxy response");
} elsif ( $buf !~ /^HTTP\/1\.[0-9] 200 .+\015\012/ ) {
return (undef, "Invalid HTTP Response via proxy");
}
my $timeout = $timeout_at - time;
return (undef, "Cannot start SSL connection: timeout")
if $timeout_at <= 0;
my $ssl_opts = $self->_ssl_opts;
unless (exists $ssl_opts->{SSL_verifycn_name}) {
$ssl_opts->{SSL_verifycn_name} = $host;
}
IO::Socket::SSL->start_SSL(
$sock,
PeerHost => $host,
PeerPort => $port,
Timeout => $timeout,
%$ssl_opts
) or return (undef, "Cannot start SSL connection: " . IO::Socket::SSL::errstr());
_set_sockopts($sock); # just in case (20101118 kazuho)
$sock;
}
sub _read_body_chunked {
my ($self, $sock, $res_content, $rest_header, $timeout_at) = @_;
my $buf = $rest_header;
READ_LOOP: while (1) {
if (
my ( $header, $next_len ) = (
$buf =~
m{\A ( # header
( [0-9a-fA-F]+ ) # next_len (hex number)
(?:;
$HTTP_TOKEN
=
(?: $HTTP_TOKEN | $HTTP_QUOTED_STRING )
)* # optional chunk-extensions
[ ]* # www.yahoo.com adds spaces here.
# Is this valid?
\015\012 # CR+LF
) }xmso
)
)
{
$buf = substr($buf, length($header)); # remove header from buf
$next_len = hex($next_len);
if ($next_len == 0) {
last READ_LOOP;
}
# +2 means trailing CRLF
READ_CHUNK: while ( $next_len+2 > length($buf) ) {
my $n = $self->read_timeout( $sock,
\$buf, $self->{bufsize}, length($buf), $timeout_at );
if (!$n) {
return $self->_r500(
!defined($n)
? "Cannot read chunk: " . _strerror_or_timeout()
: "Unexpected EOF while reading packets"
);
}
}
$$res_content .= substr($buf, 0, $next_len);
$buf = substr($buf, $next_len+2);
if (length($buf) > 0) {
next; # re-parse header
}
}
my $n = $self->read_timeout( $sock,
\$buf, $self->{bufsize}, length($buf), $timeout_at );
if (!$n) {
return $self->_r500(
!defined($n)
? "Cannot read chunk: " . _strerror_or_timeout()
: "Unexpected EOF while reading packets"
);
}
lib/Furl/HTTP.pm view on Meta::CPAN
# on EINTER/EAGAIN/EWOULDBLOCK
$self->do_select(0, $sock, $timeout_at) or return undef;
}
}
# returns (positive) number of bytes written, or undef if the socket is to be closed
sub write_timeout {
my ($self, $sock, $buf, $len, $off, $timeout_at) = @_;
my $ret;
while(1) {
# try to do the IO
defined($ret = syswrite($sock, $buf, $len, $off))
and return $ret;
if ($! == EAGAIN || $! == EWOULDBLOCK || (WIN32 && $! == EISCONN)) {
# passthru
} elsif ($! == EINTR) {
return undef if $self->{stop_if}->();
# otherwise passthru
} else {
return undef;
}
$self->do_select(1, $sock, $timeout_at) or return undef;
}
}
# writes all data in buf and returns number of bytes written or undef if failed
sub write_all {
my ($self, $sock, $buf, $timeout_at) = @_;
my $off = 0;
while (my $len = length($buf) - $off) {
my $ret = $self->write_timeout($sock, $buf, $len, $off, $timeout_at)
or return undef;
$off += $ret;
}
return $off;
}
sub _r500 {
my($self, $message) = @_;
$message = Carp::shortmess($message); # add lineno and filename
return(0, 500, "Internal Response: $message",
[
'Content-Length' => length($message),
'X-Internal-Response' => 1,
# XXX ^^ EXPERIMENTAL header. Do not depend to this.
], $message
);
}
sub _strerror_or_timeout {
$! != 0 ? "$!" : 'timeout';
}
sub _set_sockopts {
my $sock = shift;
setsockopt( $sock, IPPROTO_TCP, TCP_NODELAY, 1 )
or Carp::croak("Failed to setsockopt(TCP_NODELAY): $!");
if (WIN32) {
if (ref($sock) ne 'IO::Socket::SSL') {
my $tmp = 1;
ioctl( $sock, 0x8004667E, \$tmp )
or Carp::croak("Cannot set flags for the socket: $!");
}
} else {
my $flags = fcntl( $sock, F_GETFL, 0 )
or Carp::croak("Cannot get flags for the socket: $!");
$flags = fcntl( $sock, F_SETFL, $flags | O_NONBLOCK )
or Carp::croak("Cannot set flags for the socket: $!");
}
{
# no buffering
my $orig = select();
select($sock); $|=1;
select($orig);
}
binmode $sock;
}
# You can override this method if you want to use more powerful matcher.
sub match_no_proxy {
my ( $self, $no_proxy, $host ) = @_;
# ref. curl.1.
# list of host names that shouldn't go through any proxy.
# If set to a asterisk '*' only, it matches all hosts.
if ( $no_proxy eq '*' ) {
return 1;
}
else {
for my $pat ( split /\s*,\s*/, lc $no_proxy ) {
if ( $host =~ /\Q$pat\E$/ ) { # suffix match(same behavior with LWP)
return 1;
}
}
}
return 0;
}
# utility class
{
package # hide from pause
Furl::FileStream;
use overload '.=' => 'append', fallback => 1;
sub new {
my ($class, $fh) = @_;
bless {fh => $fh}, $class;
}
sub append {
my($self, $partial) = @_;
print {$self->{fh}} $partial;
return $self;
}
sub get_response_string { undef }
}
{
package # hide from pause
lib/Furl/HTTP.pm view on Meta::CPAN
=head2 Class Methods
=head3 C<< Furl::HTTP->new(%args | \%args) :Furl >>
Creates and returns a new Furl client with I<%args>. Dies on errors.
I<%args> might be:
=over
=item agent :Str = "Furl/$VERSION"
=item timeout :Int = 10
Seconds until the call to $furl->request returns a timeout error (as an internally generated 500 error). The timeout might not be accurate since some underlying modules / built-ins function may block longer than the specified timeout. See the L</"FAQ...
=item inactivity_timeout :Int = 600
An inactivity timer for TCP read/write (in seconds). $furl->request returns a timeout error if no additional data arrives (or is sent) within the specified threshold.
=item max_redirects :Int = 7
=item proxy :Str
=item no_proxy :Str
=item headers :ArrayRef
=item header_format :Int = HEADERS_AS_ARRAYREF
This option choose return value format of C<< $furl->request >>.
This option allows HEADERS_NONE or HEADERS_AS_ARRAYREF.
B<HEADERS_AS_ARRAYREF> is a default value. This makes B<$headers> as ArrayRef.
B<HEADERS_NONE> makes B<$headers> as undef. Furl does not return parsing result of headers. You should take needed headers from B<special_headers>.
=item connection_pool :Object
This is the connection pool object for keep-alive requests. By default, it is a instance of L<Furl::ConnectionCache>.
You may not customize this variable otherwise to use L<Coro>. This attribute requires a duck type object. It has two methods, C<< $obj->steal($host, $port >> and C<< $obj->push($host, $port, $sock) >>.
=item stop_if :CodeRef
A callback function that is called by Furl after when a blocking function call returns EINTR. Furl will abort the HTTP request and return immediately if the callback returns true. Otherwise the operation is continued (the default behaviour).
=item get_address :CodeRef
A callback function to override the default address resolution logic. Takes three arguments: ($hostname, $port, $timeout_in_seconds) and returns: ($sockaddr, $errReason). If the returned $sockaddr is undef, then the resolution is considered as a fai...
=item inet_aton :CodeRef
Deprecated. New applications should use B<get_address> instead.
A callback function to customize name resolution. Takes two arguments: ($hostname, $timeout_in_seconds). If omitted, Furl calls L<Socket::inet_aton>.
=item ssl_opts :HashRef
SSL configuration used on https requests, passed directly to C<< IO::Socket::SSL->new() >>,
for example:
use IO::Socket::SSL;
my $ua = Furl::HTTP->new(
ssl_opts => {
SSL_verify_mode => SSL_VERIFY_PEER(),
},
});
See L<IO::Socket::SSL> for details.
=back
=head2 Instance Methods
=head3 C<< $furl->request(%args) :($protocol_minor_version, $code, $msg, \@headers, $body) >>
Sends an HTTP request to a specified URL and returns a protocol minor version,
status code, status message, response headers, response body respectively.
I<%args> might be:
=over
=item scheme :Str = "http"
Protocol scheme. May be C<http> or C<https>.
=item host :Str
Server host to connect.
You must specify at least C<host> or C<url>.
=item port :Int = 80
Server port to connect. The default is 80 on C<< scheme => 'http' >>,
or 443 on C<< scheme => 'https' >>.
=item path_query :Str = "/"
Path and query to request.
=item url :Str
URL to request.
You can use C<url> instead of C<scheme>, C<host>, C<port> and C<path_query>.
=item headers :ArrayRef
HTTP request headers. e.g. C<< headers => [ 'Accept-Encoding' => 'gzip' ] >>.
=item content : Str | ArrayRef[Str] | HashRef[Str] | FileHandle
Content to request.
=item write_file : FileHandle
If this parameter is set, the response content will be saved here instead of in the response object.
It's like a C<:content_file> in L<LWP::UserAgent>.
=item write_code : CodeRef
If a callback is provided with the "write_code" option
then this function will be called for each chunk of the response
content as it is received from the server.
It's like a C<:content_cb> in L<LWP::UserAgent>.
=back
The C<request()> method assumes the first argument to be an instance
of C<HTTP::Request> if the arguments are an odd number:
my $req = HTTP::Request->new(...);
my @res = $furl->request($req); # allowed
You must encode all the queries or this method will die, saying
C<Wide character in ...>.
=head3 C<< $furl->get($url :Str, $headers :ArrayRef[Str] ) >>
This is an easy-to-use alias to C<request()>, sending the C<GET> method.
=head3 C<< $furl->head($url :Str, $headers :ArrayRef[Str] ) >>
This is an easy-to-use alias to C<request()>, sending the C<HEAD> method.
=head3 C<< $furl->post($url :Str, $headers :ArrayRef[Str], $content :Any) >>
This is an easy-to-use alias to C<request()>, sending the C<POST> method.
=head3 C<< $furl->put($url :Str, $headers :ArrayRef[Str], $content :Any) >>
This is an easy-to-use alias to C<request()>, sending the C<PUT> method.
=head3 C<< $furl->delete($url :Str, $headers :ArrayRef[Str] ) >>
This is an easy-to-use alias to C<request()>, sending the C<DELETE> method.
=head1 FAQ
=over 4
=item Why IO::Socket::SSL?
Net::SSL is not well documented.
=item Why is env_proxy optional?
Environment variables are highly dependent on each users' environment,
and we think it may confuse users when something doesn't go right.
=item What operating systems are supported?
Linux 2.6 or higher, OSX Tiger or higher, Windows XP or higher.
And other operating systems will be supported if you send a patch.
=item Why doesn't Furl support chunked upload?
There are reasons why chunked POST/PUTs should not be used in general.
First, you cannot send chunked requests unless the peer server at the other end of the established TCP connection is known to be a HTTP/1.1 server.
Second, HTTP/1.1 servers disconnect their persistent connection quite quickly (compared to the time they wait for the first request), so it is not a good idea to post non-idempotent requests (e.g. POST, PUT, etc.) as a succeeding request over persist...
These facts together makes using chunked requests virtually impossible (unless you _know_ that the server supports HTTP/1.1), and this is why we decided that supporting the feature is NOT of high priority.
=item How do you build the response content as it arrives?
You can use L<IO::Callback> for this purpose.
my $fh = IO::Callback->new(
'<',
sub {
my $x = shift @data;
$x ? "-$x" : undef;
}
);
my ( $code, $msg, $headers, $content ) =
$furl->put( "http://127.0.0.1:$port/", [ 'Content-Length' => $len ], $fh,
);
=item How do you use gzip/deflate compressed communication?
Add an B<Accept-Encoding> header to your request. Furl inflates response bodies transparently according to the B<Content-Encoding> response header.
=item How do you use multipart/form-data?
You can use multipart/form-data with L<HTTP::Request::Common>.
use HTTP::Request::Common;
my $furl = Furl->new();
$req = POST 'http://www.perl.org/survey.cgi',
Content_Type => 'form-data',
Content => [
name => 'Hiromu Tokunaga',
email => 'tokuhirom@example.com',
gender => 'F',
born => '1978',
init => ["$ENV{HOME}/.profile"],
];
$furl->request($req);
lib/Furl/HTTP.pm view on Meta::CPAN
RFC 2616 section 9.4 says:
The HEAD method is identical to GET except that the server MUST NOT
return a message-body in the response.
Some web applications, however, returns message bodies on the HEAD method,
which might confuse C<Keep-Alive> processes, so Furl closes connection in
such cases.
Anyway, the HEAD method is not so useful nowadays. The GET method and
C<If-Modified-Since> are more suitable to cache HTTP contents.
=item Why does Furl take longer than specified until it returns a timeout error?
Although Furl itself supports timeout, some underlying modules / functions do not. And the most noticeable one is L<Socket::inet_aton>, the function used for name resolution (a function that converts host names to IP addresses). If you need accurate ...
use Net::DNS::Lite qw();
my $furl = Furl->new(
timeout => $my_timeout_in_seconds,
inet_aton => sub { Net::DNS::Lite::inet_aton(@_) },
);
=item How can I replace Host header instead of hostname?
Furl::HTTP does not provide a way to replace the Host header because such a design leads to security issues.
If you want to send HTTP requests to a dedicated server (or a UNIX socket), you should use the B<get_address> callback to designate the peer to which L<Furl> should connect as B<sockaddr>.
The example below sends all requests to 127.0.0.1:8080.
my $ua = Furl::HTTP->new(
get_address => sub {
my ($host, $port, $timeout) = @_;
pack_sockaddr_in(8080, inet_aton("127.0.0.1"));
},
);
my ($minor_version, $code, $msg, $headers, $body) = $furl->request(
url => 'http://example.com/foo',
method => 'GET'
);
=back
=head1 TODO
- AnyEvent::Furl?
- ipv6 support
- better docs for NO_PROXY
=head1 OPTIONAL FEATURES
=head2 Internationalized Domain Name (IDN)
This feature requires Net::IDN::Encode.
=head2 SSL
This feature requires IO::Socket::SSL.
=head2 Content-Encoding (deflate, gzip)
This feature requires Compress::Raw::Zlib.
=head1 DEVELOPMENT
To setup your environment:
$ git clone http://github.com/tokuhirom/Furl.git
$ cd Furl
To get picohttpparser:
$ git submodule init
$ git submodule update
$ perl Makefile.PL
$ make
$ sudo make install
=head2 HOW TO CONTRIBUTE
Please send the pull request via L<http://github.com/tokuhirom/Furl/>.
=head1 SEE ALSO
L<LWP>
HTTP specs:
L<http://www.w3.org/Protocols/HTTP/1.0/spec.html>
L<http://www.w3.org/Protocols/HTTP/1.1/spec.html>
=head1 LICENSE
Copyright (C) Tokuhiro Matsuno.
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=cut
( run in 1.126 second using v1.01-cache-2.11-cpan-39bf76dae61 )