Furl
view release on metacpan or search on metacpan
lib/Furl/HTTP.pm view on Meta::CPAN
package Furl::HTTP;
use strict;
use warnings;
use base qw/Exporter/;
use 5.008001;
our $VERSION = '3.15';
use Carp ();
use Furl::ConnectionCache;
use Scalar::Util ();
use Errno qw(EAGAIN ECONNRESET EINPROGRESS EINTR EWOULDBLOCK ECONNABORTED EISCONN);
use Fcntl qw(F_GETFL F_SETFL O_NONBLOCK SEEK_SET SEEK_END);
use Socket qw(
PF_INET SOCK_STREAM
IPPROTO_TCP
TCP_NODELAY
pack_sockaddr_in
);
use Time::HiRes qw(time);
use constant WIN32 => $^O eq 'MSWin32';
use HTTP::Parser::XS qw/HEADERS_NONE HEADERS_AS_ARRAYREF HEADERS_AS_HASHREF/;
our @EXPORT_OK = qw/HEADERS_NONE HEADERS_AS_ARRAYREF HEADERS_AS_HASHREF/;
# ref. RFC 2616, 3.5 Content Codings:
# For compatibility with previous implementations of HTTP,
# applications SHOULD consider "x-gzip" and "x-compress" to be
# equivalent to "gzip" and "compress" respectively.
# ("compress" is not supported, though)
my %COMPRESSED = map { $_ => undef } qw(gzip x-gzip deflate);
my $HTTP_TOKEN = '[^\x00-\x31\x7F]+';
my $HTTP_QUOTED_STRING = q{"([^"]+|\\.)*"};
sub new {
my $class = shift;
my %args = @_ == 1 ? %{$_[0]} : @_;
my @headers = (
'User-Agent' => (delete($args{agent}) || __PACKAGE__ . '/' . $Furl::HTTP::VERSION),
);
my $connection_header = 'keep-alive';
if(defined $args{headers}) {
my $in_headers = delete $args{headers};
for (my $i = 0; $i < @$in_headers; $i += 2) {
my $name = $in_headers->[$i];
if (lc($name) eq 'connection') {
$connection_header = $in_headers->[$i + 1];
} else {
push @headers, $name, $in_headers->[$i + 1];
}
}
}
bless {
timeout => 10,
max_redirects => 7,
bufsize => 10*1024, # no mmap
headers => \@headers,
connection_header => $connection_header,
proxy => '',
no_proxy => '',
connection_pool => Furl::ConnectionCache->new(),
header_format => HEADERS_AS_ARRAYREF,
stop_if => sub {},
inet_aton => sub { Socket::inet_aton($_[0]) },
ssl_opts => {},
capture_request => $args{capture_request} || 0,
inactivity_timeout => 600,
%args
}, $class;
}
sub get {
my ( $self, $url, $headers ) = @_;
$self->request(
method => 'GET',
url => $url,
headers => $headers
);
}
sub head {
my ( $self, $url, $headers ) = @_;
$self->request(
method => 'HEAD',
url => $url,
headers => $headers
);
}
sub post {
my ( $self, $url, $headers, $content ) = @_;
$self->request(
method => 'POST',
url => $url,
headers => $headers,
content => $content
);
}
sub put {
my ( $self, $url, $headers, $content ) = @_;
lib/Furl/HTTP.pm view on Meta::CPAN
local $ENV{HTTP_PROXY} if $ENV{REQUEST_METHOD};
$self->{proxy} = $ENV{http_proxy} || $ENV{HTTP_PROXY} || $self->{proxy};
$self->{no_proxy} = $ENV{NO_PROXY} || '';
$self;
}
sub request {
my $self = shift;
my %args = @_;
my $timeout_at = time + $self->{timeout};
my ($scheme, $username, $password, $host, $port, $path_query);
if (defined(my $url = $args{url})) {
($scheme, $username, $password, $host, $port, $path_query) = $self->_parse_url($url);
}
else {
($scheme, $host, $port, $path_query) = @args{qw/scheme host port path_query/};
if (not defined $host) {
Carp::croak("Missing host name in arguments");
}
}
if (not defined $scheme) {
$scheme = 'http';
} elsif($scheme ne 'http' && $scheme ne 'https') {
Carp::croak("Unsupported scheme: $scheme");
}
my $default_port = $scheme eq 'http'
? 80
: 443;
if(not defined $port) {
$port = $default_port;
}
if(not defined $path_query) {
$path_query = '/';
}
unless (substr($path_query, 0, 1) eq '/') {
$path_query = "/$path_query"; # Compensate for slash (?foo=bar => /?foo=bar)
}
# Note. '_' is a invalid character for URI, but some servers using fucking underscore for domain name. Then, I accept the '_' character for domain name.
if ($host =~ /[^A-Za-z0-9._-]/) {
_requires('Net/IDN/Encode.pm',
'Internationalized Domain Name (IDN)');
$host = Net::IDN::Encode::domain_to_ascii($host);
}
my $proxy = $self->{proxy};
my $no_proxy = $self->{no_proxy};
if ($proxy && $no_proxy) {
if ($self->match_no_proxy($no_proxy, $host)) {
undef $proxy;
}
}
local $SIG{PIPE} = 'IGNORE';
my $sock = $self->{connection_pool}->steal($host, $port);
my $in_keepalive;
if (defined $sock) {
if ($self->_do_select(0, $sock, 0)) {
close $sock;
undef $sock;
} else {
$in_keepalive = 1;
}
}
if(!$in_keepalive) {
my $err_reason;
if ($proxy) {
my (undef, $proxy_user, $proxy_pass, $proxy_host, $proxy_port, undef)
= $self->_parse_url($proxy);
my $proxy_authorization;
if (defined $proxy_user) {
_requires('URI/Escape.pm',
'Basic auth');
my($unescape_proxy_user) = URI::Escape::uri_unescape($proxy_user);
my($unescape_proxy_pass) = URI::Escape::uri_unescape($proxy_pass);
_requires('MIME/Base64.pm',
'Basic auth');
$proxy_authorization = 'Basic ' . MIME::Base64::encode_base64("$unescape_proxy_user:$unescape_proxy_pass","");
}
if ($scheme eq 'http') {
($sock, $err_reason)
= $self->connect($proxy_host, $proxy_port, $timeout_at);
if (defined $proxy_authorization) {
$self->{proxy_authorization} = $proxy_authorization;
}
} else {
($sock, $err_reason) = $self->connect_ssl_over_proxy(
$proxy_host, $proxy_port, $host, $port, $timeout_at, $proxy_authorization);
}
} else {
if ($scheme eq 'http') {
($sock, $err_reason)
= $self->connect($host, $port, $timeout_at);
} else {
($sock, $err_reason)
= $self->connect_ssl($host, $port, $timeout_at);
}
}
return $self->_r500($err_reason)
unless $sock;
}
# keep request dump
my ($req_headers, $req_content) = ("", "");
# write request
my $method = $args{method} || 'GET';
my $connection_header = $self->{connection_header};
my $cookie_jar = $self->{cookie_jar};
{
my @headers = @{$self->{headers}};
$connection_header = 'close'
if $method eq 'HEAD';
if (my $in_headers = $args{headers}) {
for (my $i = 0; $i < @$in_headers; $i += 2) {
my $name = $in_headers->[$i];
if (lc($name) eq 'connection') {
$connection_header = $in_headers->[$i + 1];
} else {
push @headers, $name, $in_headers->[$i + 1];
}
}
}
unshift @headers, 'Connection', $connection_header;
if (exists $self->{proxy_authorization}) {
lib/Furl/HTTP.pm view on Meta::CPAN
$self->write_all($sock, $p, $timeout_at)
or return $self->_r500(
"Failed to send HTTP request: " . _strerror_or_timeout());
if ($self->{capture_request}) {
$req_headers = $p;
}
if (defined $content) {
if ($content_is_fh) {
my $ret;
my $buf;
SENDFILE: while (1) {
$ret = read($content, $buf, $self->{bufsize});
if (not defined $ret) {
Carp::croak("Failed to read request content: $!");
} elsif ($ret == 0) { # EOF
last SENDFILE;
}
$self->write_all($sock, $buf, $timeout_at)
or return $self->_r500(
"Failed to send content: " . _strerror_or_timeout()
);
if ($self->{capture_request}) {
$req_content .= $buf;
}
}
} else { # simple string
if (length($content) > 0) {
$self->write_all($sock, $content, $timeout_at)
or return $self->_r500(
"Failed to send content: " . _strerror_or_timeout()
);
if ($self->{capture_request}) {
$req_content = $content;
}
}
}
}
}
# read response
my $buf = '';
my $rest_header;
my $res_minor_version;
my $res_status;
my $res_msg;
my $res_headers;
my $special_headers = $args{special_headers} || +{};
$special_headers->{'connection'} = '';
$special_headers->{'content-length'} = undef;
$special_headers->{'location'} = '';
$special_headers->{'content-encoding'} = '';
$special_headers->{'transfer-encoding'} = '';
LOOP: while (1) {
my $n = $self->read_timeout($sock,
\$buf, $self->{bufsize}, length($buf), $timeout_at);
if(!$n) { # error or eof
if ($in_keepalive && length($buf) == 0
&& (defined($n) || $!==ECONNRESET || (WIN32 && $! == ECONNABORTED))) {
# the server closes the connection (maybe because of keep-alive timeout)
return $self->request(%args);
}
return $self->_r500(
!defined($n)
? "Cannot read response header: " . _strerror_or_timeout()
: "Unexpected EOF while reading response header"
);
}
else {
my $ret;
( $ret, $res_minor_version, $res_status, $res_msg, $res_headers )
= HTTP::Parser::XS::parse_http_response( $buf,
$self->{header_format}, $special_headers );
if ( $ret == -1 ) {
return $self->_r500("Invalid HTTP response");
}
elsif ( $ret == -2 ) {
# partial response
next LOOP;
}
else {
# succeeded
$rest_header = substr( $buf, $ret );
if ((int $res_status / 100) eq 1) { # Continue
# The origin server must not wait for the request body
# before sending the 100 (Continue) response.
# see http://greenbytes.de/tech/webdav/rfc2616.html#status.100
$buf = $rest_header;
next LOOP;
}
last LOOP;
}
}
}
my $max_redirects = 0;
my $do_redirect = undef;
if ($special_headers->{location}) {
$max_redirects = defined($args{max_redirects}) ? $args{max_redirects} : $self->{max_redirects};
$do_redirect = $max_redirects && $res_status =~ /^30[12378]$/;
}
my $res_content = '';
unless ($do_redirect) {
if (my $fh = $args{write_file}) {
$res_content = Furl::FileStream->new( $fh );
} elsif (my $coderef = $args{write_code}) {
$res_content = Furl::CallbackStream->new(
sub { $coderef->($res_status, $res_msg, $res_headers, @_) }
);
}
}
if (exists $COMPRESSED{ $special_headers->{'content-encoding'} }) {
_requires('Furl/ZlibStream.pm', 'Content-Encoding', 'Compress::Raw::Zlib');
$res_content = Furl::ZlibStream->new($res_content);
}
my $chunked = ($special_headers->{'transfer-encoding'} eq 'chunked');
my $content_length = $special_headers->{'content-length'};
if (defined($content_length) && $content_length !~ /\A[0-9]+\z/) {
return $self->_r500("Bad Content-Length: ${content_length}");
}
unless ($method eq 'HEAD'
|| ($res_status < 200 && $res_status >= 100)
|| $res_status == 204
|| $res_status == 304) {
my @err;
if ( $chunked ) {
@err = $self->_read_body_chunked($sock,
\$res_content, $rest_header, $timeout_at);
} else {
$res_content .= $rest_header;
if (ref $res_content || !defined($content_length)) {
@err = $self->_read_body_normal($sock,
\$res_content, length($rest_header),
$content_length, $timeout_at);
} else {
@err = $self->_read_body_normal_to_string_buffer($sock,
\$res_content, length($rest_header),
$content_length, $timeout_at);
}
}
if(@err) {
return @err;
}
}
# manage connection cache (i.e. keep-alive)
if (lc($connection_header) eq 'keep-alive') {
my $connection = lc $special_headers->{'connection'};
if (($res_minor_version == 0
? $connection eq 'keep-alive' # HTTP/1.0 needs explicit keep-alive
: $connection ne 'close') # HTTP/1.1 can keep alive by default
&& ( defined $content_length or $chunked)) {
$self->{connection_pool}->push($host, $port, $sock);
}
}
# explicitly close here, just after returning the socket to the pool,
# since it might be reused in the upcoming recursive call
undef $sock;
# process 'Set-Cookie' header.
if (defined $cookie_jar) {
my $req_url = join(
'',
$scheme,
'://',
(defined($username) && defined($password) ? "${username}:${password}@" : ()),
"$host:${port}${path_query}",
);
my $cookies = $res_headers->{'set-cookie'};
$cookies = [$cookies] if !ref$cookies;
for my $cookie (@$cookies) {
$cookie_jar->add($req_url, $cookie);
}
}
if ($do_redirect) {
my $location = $special_headers->{location};
unless ($location =~ m{^[a-z0-9]+://}) {
# RFC 2616 14.30 says Location header is absolute URI.
# But, a lot of servers return relative URI.
_requires("URI.pm", "redirect with relative url");
$location = URI->new_abs($location, "$scheme://$host:$port$path_query")->as_string;
}
# Note: RFC 1945 and RFC 2068 specify that the client is not allowed
# to change the method on the redirected request. However, most
# existing user agent implementations treat 302 as if it were a 303
# response, performing a GET on the Location field-value regardless
# of the original request method. The status codes 303 and 307 have
# been added for servers that wish to make unambiguously clear which
# kind of reaction is expected of the client. Also, 308 was introduced
# to avoid the ambiguity of 301.
return $self->request(
@_,
method => $res_status =~ /^30[178]$/ ? $method : 'GET',
url => $location,
max_redirects => $max_redirects - 1,
);
}
# return response.
if (ref $res_content) {
$res_content = $res_content->get_response_string;
}
return (
$res_minor_version, $res_status, $res_msg, $res_headers, $res_content,
$req_headers, $req_content, undef, undef, [$scheme, $username, $password, $host, $port, $path_query],
);
}
lib/Furl/HTTP.pm view on Meta::CPAN
host => 'example.com',
port => 80,
path_query => '/'
);
# or
# Accept-Encoding is supported but optional
$furl = Furl->new(
headers => [ 'Accept-Encoding' => 'gzip' ],
);
my $body = $furl->get('http://example.com/some/compressed');
=head1 DESCRIPTION
Furl is yet another HTTP client library. LWP is the de facto standard HTTP
client for Perl 5, but it is too slow for some critical jobs, and too complex
for weekend hacking. Furl resolves these issues. Enjoy it!
=head1 INTERFACE
=head2 Class Methods
=head3 C<< Furl::HTTP->new(%args | \%args) :Furl >>
Creates and returns a new Furl client with I<%args>. Dies on errors.
I<%args> might be:
=over
=item agent :Str = "Furl/$VERSION"
=item timeout :Int = 10
Seconds until the call to $furl->request returns a timeout error (as an internally generated 500 error). The timeout might not be accurate since some underlying modules / built-ins function may block longer than the specified timeout. See the L</"FAQ...
=item inactivity_timeout :Int = 600
An inactivity timer for TCP read/write (in seconds). $furl->request returns a timeout error if no additional data arrives (or is sent) within the specified threshold.
=item max_redirects :Int = 7
=item proxy :Str
=item no_proxy :Str
=item headers :ArrayRef
=item header_format :Int = HEADERS_AS_ARRAYREF
This option choose return value format of C<< $furl->request >>.
This option allows HEADERS_NONE or HEADERS_AS_ARRAYREF.
B<HEADERS_AS_ARRAYREF> is a default value. This makes B<$headers> as ArrayRef.
B<HEADERS_NONE> makes B<$headers> as undef. Furl does not return parsing result of headers. You should take needed headers from B<special_headers>.
=item connection_pool :Object
This is the connection pool object for keep-alive requests. By default, it is a instance of L<Furl::ConnectionCache>.
You may not customize this variable otherwise to use L<Coro>. This attribute requires a duck type object. It has two methods, C<< $obj->steal($host, $port >> and C<< $obj->push($host, $port, $sock) >>.
=item stop_if :CodeRef
A callback function that is called by Furl after when a blocking function call returns EINTR. Furl will abort the HTTP request and return immediately if the callback returns true. Otherwise the operation is continued (the default behaviour).
=item get_address :CodeRef
A callback function to override the default address resolution logic. Takes three arguments: ($hostname, $port, $timeout_in_seconds) and returns: ($sockaddr, $errReason). If the returned $sockaddr is undef, then the resolution is considered as a fai...
=item inet_aton :CodeRef
Deprecated. New applications should use B<get_address> instead.
A callback function to customize name resolution. Takes two arguments: ($hostname, $timeout_in_seconds). If omitted, Furl calls L<Socket::inet_aton>.
=item ssl_opts :HashRef
SSL configuration used on https requests, passed directly to C<< IO::Socket::SSL->new() >>,
for example:
use IO::Socket::SSL;
my $ua = Furl::HTTP->new(
ssl_opts => {
SSL_verify_mode => SSL_VERIFY_PEER(),
},
});
See L<IO::Socket::SSL> for details.
=back
=head2 Instance Methods
=head3 C<< $furl->request(%args) :($protocol_minor_version, $code, $msg, \@headers, $body) >>
Sends an HTTP request to a specified URL and returns a protocol minor version,
status code, status message, response headers, response body respectively.
I<%args> might be:
=over
=item scheme :Str = "http"
Protocol scheme. May be C<http> or C<https>.
=item host :Str
Server host to connect.
You must specify at least C<host> or C<url>.
=item port :Int = 80
Server port to connect. The default is 80 on C<< scheme => 'http' >>,
or 443 on C<< scheme => 'https' >>.
( run in 0.652 second using v1.01-cache-2.11-cpan-39bf76dae61 )