Furl

 view release on metacpan or  search on metacpan

lib/Furl/HTTP.pm  view on Meta::CPAN

package Furl::HTTP;
use strict;
use warnings;
use base qw/Exporter/;
use 5.008001;

our $VERSION = '3.15';

use Carp ();
use Furl::ConnectionCache;

use Scalar::Util ();
use Errno qw(EAGAIN ECONNRESET EINPROGRESS EINTR EWOULDBLOCK ECONNABORTED EISCONN);
use Fcntl qw(F_GETFL F_SETFL O_NONBLOCK SEEK_SET SEEK_END);
use Socket qw(
    PF_INET SOCK_STREAM
    IPPROTO_TCP
    TCP_NODELAY
    pack_sockaddr_in
);
use Time::HiRes qw(time);

use constant WIN32 => $^O eq 'MSWin32';
use HTTP::Parser::XS qw/HEADERS_NONE HEADERS_AS_ARRAYREF HEADERS_AS_HASHREF/;

our @EXPORT_OK = qw/HEADERS_NONE HEADERS_AS_ARRAYREF HEADERS_AS_HASHREF/;


# ref. RFC 2616, 3.5 Content Codings:
#     For compatibility with previous implementations of HTTP,
#     applications SHOULD consider "x-gzip" and "x-compress" to be
#     equivalent to "gzip" and "compress" respectively.
# ("compress" is not supported, though)
my %COMPRESSED = map { $_ => undef } qw(gzip x-gzip deflate);

my $HTTP_TOKEN         = '[^\x00-\x31\x7F]+';
my $HTTP_QUOTED_STRING = q{"([^"]+|\\.)*"};

sub new {
    my $class = shift;
    my %args = @_ == 1 ? %{$_[0]} : @_;

    my @headers = (
        'User-Agent' => (delete($args{agent}) || __PACKAGE__ . '/' . $Furl::HTTP::VERSION),
    );
    my $connection_header = 'keep-alive';
    if(defined $args{headers}) {
        my $in_headers = delete $args{headers};
        for (my $i = 0; $i < @$in_headers; $i += 2) {
            my $name = $in_headers->[$i];
            if (lc($name) eq 'connection') {
                $connection_header = $in_headers->[$i + 1];
            } else {
                push @headers, $name, $in_headers->[$i + 1];
            }
        }
    }
    bless {
        timeout            => 10,
        max_redirects      => 7,
        bufsize            => 10*1024, # no mmap
        headers            => \@headers,
        connection_header  => $connection_header,
        proxy              => '',
        no_proxy           => '',
        connection_pool    => Furl::ConnectionCache->new(),
        header_format      => HEADERS_AS_ARRAYREF,
        stop_if            => sub {},
        inet_aton          => sub { Socket::inet_aton($_[0]) },
        ssl_opts           => {},
        capture_request    => $args{capture_request} || 0,
        inactivity_timeout => 600,
        %args
    }, $class;
}

sub get {
    my ( $self, $url, $headers ) = @_;
    $self->request(
        method  => 'GET',
        url     => $url,
        headers => $headers
    );
}

sub head {
    my ( $self, $url, $headers ) = @_;
    $self->request(
        method  => 'HEAD',
        url     => $url,
        headers => $headers
    );
}

sub post {
    my ( $self, $url, $headers, $content ) = @_;
    $self->request(
        method  => 'POST',
        url     => $url,
        headers => $headers,
        content => $content
    );
}

sub put {
    my ( $self, $url, $headers, $content ) = @_;

lib/Furl/HTTP.pm  view on Meta::CPAN

    local $ENV{HTTP_PROXY} if $ENV{REQUEST_METHOD};
    $self->{proxy} = $ENV{http_proxy} || $ENV{HTTP_PROXY} || $self->{proxy};
    $self->{no_proxy} = $ENV{NO_PROXY} || '';
    $self;
}

sub request {
    my $self = shift;
    my %args = @_;

    my $timeout_at = time + $self->{timeout};

    my ($scheme, $username, $password, $host, $port, $path_query);
    if (defined(my $url = $args{url})) {
        ($scheme, $username, $password, $host, $port, $path_query) = $self->_parse_url($url);
    }
    else {
        ($scheme, $host, $port, $path_query) = @args{qw/scheme host port path_query/};
        if (not defined $host) {
            Carp::croak("Missing host name in arguments");
        }
    }

    if (not defined $scheme) {
        $scheme = 'http';
    } elsif($scheme ne 'http' && $scheme ne 'https') {
        Carp::croak("Unsupported scheme: $scheme");
    }

    my $default_port = $scheme eq 'http'
        ? 80
        : 443;
    if(not defined $port) {
        $port = $default_port;
    }
    if(not defined $path_query) {
        $path_query = '/';
    }

    unless (substr($path_query, 0, 1) eq '/') {
        $path_query = "/$path_query"; # Compensate for slash (?foo=bar => /?foo=bar)
    }

    # Note. '_' is a invalid character for URI, but some servers using fucking underscore for domain name. Then, I accept the '_' character for domain name.
    if ($host =~ /[^A-Za-z0-9._-]/) {
        _requires('Net/IDN/Encode.pm',
            'Internationalized Domain Name (IDN)');
        $host = Net::IDN::Encode::domain_to_ascii($host);
    }

    my $proxy = $self->{proxy};
    my $no_proxy = $self->{no_proxy};
    if ($proxy && $no_proxy) {
        if ($self->match_no_proxy($no_proxy, $host)) {
            undef $proxy;
        }
    }

    local $SIG{PIPE} = 'IGNORE';
    my $sock         = $self->{connection_pool}->steal($host, $port);
    my $in_keepalive;
    if (defined $sock) {
        if ($self->_do_select(0, $sock, 0)) {
            close $sock;
            undef $sock;
        } else {
            $in_keepalive = 1;
        }
    }
    if(!$in_keepalive) {
        my $err_reason;
        if ($proxy) {
            my (undef, $proxy_user, $proxy_pass, $proxy_host, $proxy_port, undef)
                = $self->_parse_url($proxy);
            my $proxy_authorization;
            if (defined $proxy_user) {
                _requires('URI/Escape.pm',
                    'Basic auth');
                my($unescape_proxy_user) = URI::Escape::uri_unescape($proxy_user);
                my($unescape_proxy_pass) = URI::Escape::uri_unescape($proxy_pass);
                _requires('MIME/Base64.pm',
                    'Basic auth');
                $proxy_authorization = 'Basic ' . MIME::Base64::encode_base64("$unescape_proxy_user:$unescape_proxy_pass","");
            }
            if ($scheme eq 'http') {
                ($sock, $err_reason)
                    = $self->connect($proxy_host, $proxy_port, $timeout_at);
                if (defined $proxy_authorization) {
                    $self->{proxy_authorization} = $proxy_authorization;
                }
            } else {
                ($sock, $err_reason) = $self->connect_ssl_over_proxy(
                    $proxy_host, $proxy_port, $host, $port, $timeout_at, $proxy_authorization);
            }
        } else {
            if ($scheme eq 'http') {
                ($sock, $err_reason)
                    = $self->connect($host, $port, $timeout_at);
            } else {
                ($sock, $err_reason)
                    = $self->connect_ssl($host, $port, $timeout_at);
            }
        }
        return $self->_r500($err_reason)
            unless $sock;
    }

    # keep request dump
    my ($req_headers, $req_content) = ("", "");

    # write request
    my $method = $args{method} || 'GET';
    my $connection_header = $self->{connection_header};
    my $cookie_jar = $self->{cookie_jar};
    {
        my @headers = @{$self->{headers}};
        $connection_header = 'close'
            if $method eq 'HEAD';
        if (my $in_headers = $args{headers}) {
            for (my $i = 0; $i < @$in_headers; $i += 2) {
                my $name = $in_headers->[$i];
                if (lc($name) eq 'connection') {
                    $connection_header = $in_headers->[$i + 1];
                } else {
                    push @headers, $name, $in_headers->[$i + 1];
                }
            }
        }
        unshift @headers, 'Connection', $connection_header;
        if (exists $self->{proxy_authorization}) {

lib/Furl/HTTP.pm  view on Meta::CPAN

        $self->write_all($sock, $p, $timeout_at)
            or return $self->_r500(
                "Failed to send HTTP request: " . _strerror_or_timeout());

        if ($self->{capture_request}) {
            $req_headers = $p;
        }

        if (defined $content) {
            if ($content_is_fh) {
                my $ret;
                my $buf;
                SENDFILE: while (1) {
                    $ret = read($content, $buf, $self->{bufsize});
                    if (not defined $ret) {
                        Carp::croak("Failed to read request content: $!");
                    } elsif ($ret == 0) { # EOF
                        last SENDFILE;
                    }
                    $self->write_all($sock, $buf, $timeout_at)
                        or return $self->_r500(
                            "Failed to send content: " . _strerror_or_timeout()
                        );

                    if ($self->{capture_request}) {
                        $req_content .= $buf;
                    }
                }
            } else { # simple string
                if (length($content) > 0) {
                    $self->write_all($sock, $content, $timeout_at)
                        or return $self->_r500(
                            "Failed to send content: " . _strerror_or_timeout()
                        );

                    if ($self->{capture_request}) {
                        $req_content = $content;
                    }
                }
            }
        }
    }

    # read response
    my $buf = '';
    my $rest_header;
    my $res_minor_version;
    my $res_status;
    my $res_msg;
    my $res_headers;
    my $special_headers = $args{special_headers} || +{};
    $special_headers->{'connection'}        = '';
    $special_headers->{'content-length'}    = undef;
    $special_headers->{'location'}          = '';
    $special_headers->{'content-encoding'}  = '';
    $special_headers->{'transfer-encoding'} = '';
  LOOP: while (1) {
        my $n = $self->read_timeout($sock,
            \$buf, $self->{bufsize}, length($buf), $timeout_at);
        if(!$n) { # error or eof
            if ($in_keepalive && length($buf) == 0
                && (defined($n) || $!==ECONNRESET || (WIN32 && $! == ECONNABORTED))) {
                # the server closes the connection (maybe because of keep-alive timeout)
                return $self->request(%args);
            }
            return $self->_r500(
                !defined($n)
                    ? "Cannot read response header: " . _strerror_or_timeout()
                    : "Unexpected EOF while reading response header"
            );
        }
        else {
            my $ret;
            ( $ret, $res_minor_version, $res_status, $res_msg, $res_headers )
                =  HTTP::Parser::XS::parse_http_response( $buf,
                       $self->{header_format}, $special_headers );
            if ( $ret == -1 ) {
                return $self->_r500("Invalid HTTP response");
            }
            elsif ( $ret == -2 ) {
                # partial response
                next LOOP;
            }
            else {
                # succeeded
                $rest_header = substr( $buf, $ret );
                if ((int $res_status / 100) eq 1) { # Continue
                    # The origin server must not wait for the request body
                    # before sending the 100 (Continue) response.
                    # see http://greenbytes.de/tech/webdav/rfc2616.html#status.100
                    $buf = $rest_header;
                    next LOOP;
                }
                last LOOP;
            }
        }
    }

    my $max_redirects = 0;
    my $do_redirect = undef;
    if ($special_headers->{location}) {
        $max_redirects = defined($args{max_redirects}) ? $args{max_redirects} : $self->{max_redirects};
        $do_redirect = $max_redirects && $res_status =~ /^30[12378]$/;
    }

    my $res_content = '';
    unless ($do_redirect) {
        if (my $fh = $args{write_file}) {
            $res_content = Furl::FileStream->new( $fh );
        } elsif (my $coderef = $args{write_code}) {
            $res_content = Furl::CallbackStream->new(
                sub { $coderef->($res_status, $res_msg, $res_headers, @_) }
            );
        }
    }

    if (exists $COMPRESSED{ $special_headers->{'content-encoding'} }) {
        _requires('Furl/ZlibStream.pm', 'Content-Encoding', 'Compress::Raw::Zlib');

        $res_content = Furl::ZlibStream->new($res_content);
    }

    my $chunked        = ($special_headers->{'transfer-encoding'} eq 'chunked');
    my $content_length =  $special_headers->{'content-length'};
    if (defined($content_length) && $content_length !~ /\A[0-9]+\z/) {
        return $self->_r500("Bad Content-Length: ${content_length}");
    }

    unless ($method eq 'HEAD'
            || ($res_status < 200 && $res_status >= 100)
            || $res_status == 204
            || $res_status == 304) {
        my @err;
        if ( $chunked ) {
            @err = $self->_read_body_chunked($sock,
                \$res_content, $rest_header, $timeout_at);
        } else {
            $res_content .= $rest_header;
            if (ref $res_content || !defined($content_length)) {
                @err = $self->_read_body_normal($sock,
                    \$res_content, length($rest_header),
                    $content_length, $timeout_at);
            } else {
                @err = $self->_read_body_normal_to_string_buffer($sock,
                    \$res_content, length($rest_header),
                    $content_length, $timeout_at);
            }
        }
        if(@err) {
            return @err;
        }
    }

    # manage connection cache (i.e. keep-alive)
    if (lc($connection_header) eq 'keep-alive') {
        my $connection = lc $special_headers->{'connection'};
        if (($res_minor_version == 0
             ? $connection eq 'keep-alive' # HTTP/1.0 needs explicit keep-alive
             : $connection ne 'close')    # HTTP/1.1 can keep alive by default
            && ( defined $content_length or $chunked)) {
            $self->{connection_pool}->push($host, $port, $sock);
        }
    }
    # explicitly close here, just after returning the socket to the pool,
    # since it might be reused in the upcoming recursive call
    undef $sock;

    # process 'Set-Cookie' header.
    if (defined $cookie_jar) {
        my $req_url = join(
            '',
            $scheme,
            '://',
            (defined($username) && defined($password) ? "${username}:${password}@" : ()),
            "$host:${port}${path_query}",
        );
        my $cookies = $res_headers->{'set-cookie'};
        $cookies = [$cookies] if !ref$cookies;
        for my $cookie (@$cookies) {
            $cookie_jar->add($req_url, $cookie);
        }
    }

    if ($do_redirect) {
        my $location = $special_headers->{location};
        unless ($location =~ m{^[a-z0-9]+://}) {
            # RFC 2616 14.30 says Location header is absolute URI.
            # But, a lot of servers return relative URI.
            _requires("URI.pm", "redirect with relative url");
            $location = URI->new_abs($location, "$scheme://$host:$port$path_query")->as_string;
        }
        # Note: RFC 1945 and RFC 2068 specify that the client is not allowed
        # to change the method on the redirected request.  However, most
        # existing user agent implementations treat 302 as if it were a 303
        # response, performing a GET on the Location field-value regardless
        # of the original request method. The status codes 303 and 307 have
        # been added for servers that wish to make unambiguously clear which
        # kind of reaction is expected of the client. Also, 308 was introduced
        # to avoid the ambiguity of 301.
        return $self->request(
            @_,
            method        => $res_status =~ /^30[178]$/ ? $method : 'GET',
            url           => $location,
            max_redirects => $max_redirects - 1,
        );
    }

    # return response.

    if (ref $res_content) {
        $res_content = $res_content->get_response_string;
    }

    return (
        $res_minor_version, $res_status, $res_msg, $res_headers, $res_content,
        $req_headers, $req_content, undef, undef, [$scheme, $username, $password, $host, $port, $path_query],
    );
}

lib/Furl/HTTP.pm  view on Meta::CPAN

        host       => 'example.com',
        port       => 80,
        path_query => '/'
    );
    # or

    # Accept-Encoding is supported but optional
    $furl = Furl->new(
        headers => [ 'Accept-Encoding' => 'gzip' ],
    );
    my $body = $furl->get('http://example.com/some/compressed');

=head1 DESCRIPTION

Furl is yet another HTTP client library. LWP is the de facto standard HTTP
client for Perl 5, but it is too slow for some critical jobs, and too complex
for weekend hacking. Furl resolves these issues. Enjoy it!

=head1 INTERFACE

=head2 Class Methods

=head3 C<< Furl::HTTP->new(%args | \%args) :Furl >>

Creates and returns a new Furl client with I<%args>. Dies on errors.

I<%args> might be:

=over

=item agent :Str = "Furl/$VERSION"

=item timeout :Int = 10

Seconds until the call to $furl->request returns a timeout error (as an internally generated 500 error). The timeout might not be accurate since some underlying modules / built-ins function may block longer than the specified timeout. See the L</"FAQ...

=item inactivity_timeout :Int = 600

An inactivity timer for TCP read/write (in seconds). $furl->request returns a timeout error if no additional data arrives (or is sent) within the specified threshold.

=item max_redirects :Int = 7

=item proxy :Str

=item no_proxy :Str

=item headers :ArrayRef

=item header_format :Int = HEADERS_AS_ARRAYREF

This option choose return value format of C<< $furl->request >>.

This option allows HEADERS_NONE or HEADERS_AS_ARRAYREF.

B<HEADERS_AS_ARRAYREF> is a default value. This makes B<$headers> as ArrayRef.

B<HEADERS_NONE> makes B<$headers> as undef. Furl does not return parsing result of headers. You should take needed headers from B<special_headers>.

=item connection_pool :Object

This is the connection pool object for keep-alive requests. By default, it is a instance of L<Furl::ConnectionCache>.

You may not customize this variable otherwise to use L<Coro>. This attribute requires a duck type object. It has two methods, C<< $obj->steal($host, $port >> and C<< $obj->push($host, $port, $sock) >>.

=item stop_if :CodeRef

A callback function that is called by Furl after when a blocking function call returns EINTR. Furl will abort the HTTP request and return immediately if the callback returns true. Otherwise the operation is continued (the default behaviour).

=item get_address :CodeRef

A callback function to override the default address resolution logic. Takes three arguments: ($hostname, $port, $timeout_in_seconds) and returns: ($sockaddr, $errReason).  If the returned $sockaddr is undef, then the resolution is considered as a fai...

=item inet_aton :CodeRef

Deprecated.  New applications should use B<get_address> instead.

A callback function to customize name resolution. Takes two arguments: ($hostname, $timeout_in_seconds). If omitted, Furl calls L<Socket::inet_aton>.

=item ssl_opts :HashRef

SSL configuration used on https requests, passed directly to C<< IO::Socket::SSL->new() >>,

for example:

    use IO::Socket::SSL;

    my $ua = Furl::HTTP->new(
        ssl_opts => {
            SSL_verify_mode => SSL_VERIFY_PEER(),
        },
    });

See L<IO::Socket::SSL> for details.

=back

=head2 Instance Methods

=head3 C<< $furl->request(%args) :($protocol_minor_version, $code, $msg, \@headers, $body) >>

Sends an HTTP request to a specified URL and returns a protocol minor version,
status code, status message, response headers, response body respectively.

I<%args> might be:

=over

=item scheme :Str = "http"

Protocol scheme. May be C<http> or C<https>.

=item host :Str

Server host to connect.

You must specify at least C<host> or C<url>.

=item port :Int = 80

Server port to connect. The default is 80 on C<< scheme => 'http' >>,
or 443 on C<< scheme => 'https' >>.



( run in 0.652 second using v1.01-cache-2.11-cpan-39bf76dae61 )