Furl

 view release on metacpan or  search on metacpan

lib/Furl/HTTP.pm  view on Meta::CPAN

    # return response.

    if (ref $res_content) {
        $res_content = $res_content->get_response_string;
    }

    return (
        $res_minor_version, $res_status, $res_msg, $res_headers, $res_content,
        $req_headers, $req_content, undef, undef, [$scheme, $username, $password, $host, $port, $path_query],
    );
}

# connects to $host:$port and returns $socket
sub connect :method {
    my($self, $host, $port, $timeout_at) = @_;
    my $sock;

    my $timeout = $timeout_at - time;
    return (undef, "Failed to resolve host name: timeout")
        if $timeout <= 0;
    my ($sock_addr, $err_reason) = $self->_get_address($host, $port, $timeout);
    return (undef, "Cannot resolve host name: $host (port: $port), " . ($err_reason || $!))
        unless $sock_addr;

 RETRY:
    socket($sock, Socket::sockaddr_family($sock_addr), SOCK_STREAM, 0)
        or Carp::croak("Cannot create socket: $!");
    _set_sockopts($sock);
    if (connect($sock, $sock_addr)) {
        # connected
    } elsif ($! == EINPROGRESS || (WIN32 && $! == EWOULDBLOCK)) {
        $self->do_select(1, $sock, $timeout_at)
            or return (undef, "Cannot connect to ${host}:${port}: timeout");
        # connected
    } else {
        if ($! == EINTR && ! $self->{stop_if}->()) {
            close $sock;
            goto RETRY;
        }
        return (undef, "Cannot connect to ${host}:${port}: $!");
    }
    $sock;
}

sub _get_address {
    my ($self, $host, $port, $timeout) = @_;
    if ($self->{get_address}) {
        return $self->{get_address}->($host, $port, $timeout);
    }
    # default rule (TODO add support for IPv6)
    my $iaddr = $self->{inet_aton}->($host, $timeout)
        or return (undef, $!);
    pack_sockaddr_in($port, $iaddr);
}

sub _ssl_opts {
    my $self = shift;
    my $ssl_opts = $self->{ssl_opts};
    unless (exists $ssl_opts->{SSL_verify_mode}) {
        # set SSL_VERIFY_PEER as default.
        $ssl_opts->{SSL_verify_mode}     = IO::Socket::SSL::SSL_VERIFY_PEER();
        unless (exists $ssl_opts->{SSL_verifycn_scheme}) {
            $ssl_opts->{SSL_verifycn_scheme} = 'www'
        }
    }
    if ($ssl_opts->{SSL_verify_mode}) {
        unless (exists $ssl_opts->{SSL_ca_file} || exists $ssl_opts->{SSL_ca_path}) {
            require Mozilla::CA;
            $ssl_opts->{SSL_ca_file} = Mozilla::CA::SSL_ca_file();
        }
    }
    $ssl_opts;
}

# connect SSL socket.
# You can override this method in your child class, if you want to use Crypt::SSLeay or some other library.
# @return file handle like object
sub connect_ssl {
    my ($self, $host, $port, $timeout_at) = @_;
    _requires('IO/Socket/SSL.pm', 'SSL');

    my ($sock, $err_reason) = $self->connect($host, $port, $timeout_at);
    return (undef, $err_reason)
        unless $sock;

    my $timeout = $timeout_at - time;
    return (undef, "Cannot create SSL connection: timeout")
        if $timeout <= 0;

    my $ssl_opts = $self->_ssl_opts;
    IO::Socket::SSL->start_SSL(
        $sock,
        PeerHost => $host,
        PeerPort => $port,
        Timeout  => $timeout,
        %$ssl_opts,
    ) or return (undef, "Cannot create SSL connection: " . IO::Socket::SSL::errstr());
    _set_sockopts($sock);
    $sock;
}

sub connect_ssl_over_proxy {
    my ($self, $proxy_host, $proxy_port, $host, $port, $timeout_at, $proxy_authorization) = @_;
    _requires('IO/Socket/SSL.pm', 'SSL');

    my $sock = $self->connect($proxy_host, $proxy_port, $timeout_at);

    my $p = "CONNECT $host:$port HTTP/1.0\015\012Server: $host\015\012";
    if (defined $proxy_authorization) {
        $p .= "Proxy-Authorization: $proxy_authorization\015\012";
    }
    $p .= "\015\012";
    $self->write_all($sock, $p, $timeout_at)
        or return $self->_r500(
            "Failed to send HTTP request to proxy: " . _strerror_or_timeout());
    my $buf = '';
    my $read = $self->read_timeout($sock,
        \$buf, $self->{bufsize}, length($buf), $timeout_at);
    if (not defined $read) {
        return (undef, "Cannot read proxy response: " . _strerror_or_timeout());
    } elsif ( $read == 0 ) {    # eof
        return (undef, "Unexpected EOF while reading proxy response");
    } elsif ( $buf !~ /^HTTP\/1\.[0-9] 200 .+\015\012/ ) {
        return (undef, "Invalid HTTP Response via proxy");
    }

    my $timeout = $timeout_at - time;
    return (undef, "Cannot start SSL connection: timeout")
        if $timeout_at <= 0;

    my $ssl_opts = $self->_ssl_opts;
    unless (exists $ssl_opts->{SSL_verifycn_name}) {
        $ssl_opts->{SSL_verifycn_name} = $host;
    }
    IO::Socket::SSL->start_SSL(
        $sock,
        PeerHost => $host,
        PeerPort => $port,
        Timeout  => $timeout,
        %$ssl_opts
    ) or return (undef, "Cannot start SSL connection: " . IO::Socket::SSL::errstr());
    _set_sockopts($sock); # just in case (20101118 kazuho)
    $sock;
}

sub _read_body_chunked {
    my ($self, $sock, $res_content, $rest_header, $timeout_at) = @_;

    my $buf = $rest_header;
  READ_LOOP: while (1) {
        if (
            my ( $header, $next_len ) = (
                $buf =~
                  m{\A (                 # header
                        ( [0-9a-fA-F]+ ) # next_len (hex number)
                        (?:;
                            $HTTP_TOKEN
                            =
                            (?: $HTTP_TOKEN | $HTTP_QUOTED_STRING )
                        )*               # optional chunk-extensions
                        [ ]*             # www.yahoo.com adds spaces here.
                                         # Is this valid?
                        \015\012         # CR+LF
                  ) }xmso
            )
          )
        {
            $buf = substr($buf, length($header)); # remove header from buf
            $next_len = hex($next_len);
            if ($next_len == 0) {
                last READ_LOOP;
            }

            # +2 means trailing CRLF
          READ_CHUNK: while ( $next_len+2 > length($buf) ) {
                my $n = $self->read_timeout( $sock,
                    \$buf, $self->{bufsize}, length($buf), $timeout_at );
                if (!$n) {
                    return $self->_r500(
                        !defined($n)
                            ? "Cannot read chunk: " . _strerror_or_timeout()
                            : "Unexpected EOF while reading packets"
                    );
                }
            }
            $$res_content .= substr($buf, 0, $next_len);
            $buf = substr($buf, $next_len+2);
            if (length($buf) > 0) {
                next; # re-parse header
            }
        }

        my $n = $self->read_timeout( $sock,
            \$buf, $self->{bufsize}, length($buf), $timeout_at );
        if (!$n) {
            return $self->_r500(
                !defined($n)
                    ? "Cannot read chunk: " . _strerror_or_timeout()
                    : "Unexpected EOF while reading packets"
            );
        }

lib/Furl/HTTP.pm  view on Meta::CPAN

        # on EINTER/EAGAIN/EWOULDBLOCK
        $self->do_select(0, $sock, $timeout_at) or return undef;
    }
}

# returns (positive) number of bytes written, or undef if the socket is to be closed
sub write_timeout {
    my ($self, $sock, $buf, $len, $off, $timeout_at) = @_;
    my $ret;
    while(1) {
        # try to do the IO
        defined($ret = syswrite($sock, $buf, $len, $off))
            and return $ret;
        if ($! == EAGAIN || $! == EWOULDBLOCK || (WIN32 && $! == EISCONN)) {
            # passthru
        } elsif ($! == EINTR) {
            return undef if $self->{stop_if}->();
            # otherwise passthru
        } else {
            return undef;
        }
        $self->do_select(1, $sock, $timeout_at) or return undef;
    }
}

# writes all data in buf and returns number of bytes written or undef if failed
sub write_all {
    my ($self, $sock, $buf, $timeout_at) = @_;
    my $off = 0;
    while (my $len = length($buf) - $off) {
        my $ret = $self->write_timeout($sock, $buf, $len, $off, $timeout_at)
            or return undef;
        $off += $ret;
    }
    return $off;
}


sub _r500 {
    my($self, $message) = @_;
    $message = Carp::shortmess($message); # add lineno and filename
    return(0, 500, "Internal Response: $message",
        [
            'Content-Length' => length($message),
            'X-Internal-Response' => 1,
            # XXX ^^ EXPERIMENTAL header. Do not depend to this.
        ], $message
    );
}

sub _strerror_or_timeout {
    $! != 0 ? "$!" : 'timeout';
}

sub _set_sockopts {
    my $sock = shift;

    setsockopt( $sock, IPPROTO_TCP, TCP_NODELAY, 1 )
        or Carp::croak("Failed to setsockopt(TCP_NODELAY): $!");
    if (WIN32) {
        if (ref($sock) ne 'IO::Socket::SSL') {
            my $tmp = 1;
            ioctl( $sock, 0x8004667E, \$tmp )
                or Carp::croak("Cannot set flags for the socket: $!");
        }
    } else {
        my $flags = fcntl( $sock, F_GETFL, 0 )
            or Carp::croak("Cannot get flags for the socket: $!");
        $flags = fcntl( $sock, F_SETFL, $flags | O_NONBLOCK )
            or Carp::croak("Cannot set flags for the socket: $!");
    }

    {
        # no buffering
        my $orig = select();
        select($sock); $|=1;
        select($orig);
    }

    binmode $sock;
}

# You can override this method if you want to use more powerful matcher.
sub match_no_proxy {
    my ( $self, $no_proxy, $host ) = @_;

    # ref. curl.1.
    #   list of host names that shouldn't go through any proxy.
    #   If set to a asterisk '*' only, it matches all hosts.
    if ( $no_proxy eq '*' ) {
        return 1;
    }
    else {
        for my $pat ( split /\s*,\s*/, lc $no_proxy ) {
            if ( $host =~ /\Q$pat\E$/ ) { # suffix match(same behavior with LWP)
                return 1;
            }
        }
    }
    return 0;
}

# utility class
{
    package # hide from pause
        Furl::FileStream;
    use overload '.=' => 'append', fallback => 1;
    sub new {
        my ($class, $fh) = @_;
        bless {fh => $fh}, $class;
    }
    sub append {
        my($self, $partial) = @_;
        print {$self->{fh}} $partial;
        return $self;
    }
    sub get_response_string { undef }
}

{
    package # hide from pause

lib/Furl/HTTP.pm  view on Meta::CPAN

=head2 Class Methods

=head3 C<< Furl::HTTP->new(%args | \%args) :Furl >>

Creates and returns a new Furl client with I<%args>. Dies on errors.

I<%args> might be:

=over

=item agent :Str = "Furl/$VERSION"

=item timeout :Int = 10

Seconds until the call to $furl->request returns a timeout error (as an internally generated 500 error). The timeout might not be accurate since some underlying modules / built-ins function may block longer than the specified timeout. See the L</"FAQ...

=item inactivity_timeout :Int = 600

An inactivity timer for TCP read/write (in seconds). $furl->request returns a timeout error if no additional data arrives (or is sent) within the specified threshold.

=item max_redirects :Int = 7

=item proxy :Str

=item no_proxy :Str

=item headers :ArrayRef

=item header_format :Int = HEADERS_AS_ARRAYREF

This option choose return value format of C<< $furl->request >>.

This option allows HEADERS_NONE or HEADERS_AS_ARRAYREF.

B<HEADERS_AS_ARRAYREF> is a default value. This makes B<$headers> as ArrayRef.

B<HEADERS_NONE> makes B<$headers> as undef. Furl does not return parsing result of headers. You should take needed headers from B<special_headers>.

=item connection_pool :Object

This is the connection pool object for keep-alive requests. By default, it is a instance of L<Furl::ConnectionCache>.

You may not customize this variable otherwise to use L<Coro>. This attribute requires a duck type object. It has two methods, C<< $obj->steal($host, $port >> and C<< $obj->push($host, $port, $sock) >>.

=item stop_if :CodeRef

A callback function that is called by Furl after when a blocking function call returns EINTR. Furl will abort the HTTP request and return immediately if the callback returns true. Otherwise the operation is continued (the default behaviour).

=item get_address :CodeRef

A callback function to override the default address resolution logic. Takes three arguments: ($hostname, $port, $timeout_in_seconds) and returns: ($sockaddr, $errReason).  If the returned $sockaddr is undef, then the resolution is considered as a fai...

=item inet_aton :CodeRef

Deprecated.  New applications should use B<get_address> instead.

A callback function to customize name resolution. Takes two arguments: ($hostname, $timeout_in_seconds). If omitted, Furl calls L<Socket::inet_aton>.

=item ssl_opts :HashRef

SSL configuration used on https requests, passed directly to C<< IO::Socket::SSL->new() >>,

for example:

    use IO::Socket::SSL;

    my $ua = Furl::HTTP->new(
        ssl_opts => {
            SSL_verify_mode => SSL_VERIFY_PEER(),
        },
    });

See L<IO::Socket::SSL> for details.

=back

=head2 Instance Methods

=head3 C<< $furl->request(%args) :($protocol_minor_version, $code, $msg, \@headers, $body) >>

Sends an HTTP request to a specified URL and returns a protocol minor version,
status code, status message, response headers, response body respectively.

I<%args> might be:

=over

=item scheme :Str = "http"

Protocol scheme. May be C<http> or C<https>.

=item host :Str

Server host to connect.

You must specify at least C<host> or C<url>.

=item port :Int = 80

Server port to connect. The default is 80 on C<< scheme => 'http' >>,
or 443 on C<< scheme => 'https' >>.

=item path_query :Str = "/"

Path and query to request.

=item url :Str

URL to request.

You can use C<url> instead of C<scheme>, C<host>, C<port> and C<path_query>.

=item headers :ArrayRef

HTTP request headers. e.g. C<< headers => [ 'Accept-Encoding' => 'gzip' ] >>.

=item content : Str | ArrayRef[Str] | HashRef[Str] | FileHandle

Content to request.

=item write_file : FileHandle

If this parameter is set, the response content will be saved here instead of in the response object.

It's like a C<:content_file> in L<LWP::UserAgent>.

=item write_code : CodeRef

If a callback is provided with the "write_code" option
then this function will be called for each chunk of the response
content as it is received from the server.

It's like a C<:content_cb> in L<LWP::UserAgent>.

=back

The C<request()> method assumes the first argument to be an instance
of C<HTTP::Request> if the arguments are an odd number:

    my $req = HTTP::Request->new(...);
    my @res = $furl->request($req); # allowed

You must encode all the queries or this method will die, saying
C<Wide character in ...>.

=head3 C<< $furl->get($url :Str, $headers :ArrayRef[Str] ) >>

This is an easy-to-use alias to C<request()>, sending the C<GET> method.

=head3 C<< $furl->head($url :Str, $headers :ArrayRef[Str] ) >>

This is an easy-to-use alias to C<request()>, sending the C<HEAD> method.

=head3 C<< $furl->post($url :Str, $headers :ArrayRef[Str], $content :Any) >>

This is an easy-to-use alias to C<request()>, sending the C<POST> method.

=head3 C<< $furl->put($url :Str, $headers :ArrayRef[Str], $content :Any) >>

This is an easy-to-use alias to C<request()>, sending the C<PUT> method.

=head3 C<< $furl->delete($url :Str, $headers :ArrayRef[Str] ) >>

This is an easy-to-use alias to C<request()>, sending the C<DELETE> method.

=head1 FAQ

=over 4

=item Why IO::Socket::SSL?

Net::SSL is not well documented.

=item Why is env_proxy optional?

Environment variables are highly dependent on each users' environment,
and we think it may confuse users when something doesn't go right.

=item What operating systems are supported?

Linux 2.6 or higher, OSX Tiger or higher, Windows XP or higher.

And other operating systems will be supported if you send a patch.

=item Why doesn't Furl support chunked upload?

There are reasons why chunked POST/PUTs should not be used in general.

First, you cannot send chunked requests unless the peer server at the other end of the established TCP connection is known to be a HTTP/1.1 server.

Second, HTTP/1.1 servers disconnect their persistent connection quite quickly (compared to the time they wait for the first request), so it is not a good idea to post non-idempotent requests (e.g. POST, PUT, etc.) as a succeeding request over persist...

These facts together makes using chunked requests virtually impossible (unless you _know_ that the server supports HTTP/1.1), and this is why we decided that supporting the feature is NOT of high priority.

=item How do you build the response content as it arrives?

You can use L<IO::Callback> for this purpose.

    my $fh = IO::Callback->new(
        '<',
        sub {
            my $x = shift @data;
            $x ? "-$x" : undef;
        }
    );
    my ( $code, $msg, $headers, $content ) =
      $furl->put( "http://127.0.0.1:$port/", [ 'Content-Length' => $len ], $fh,
      );

=item How do you use gzip/deflate compressed communication?

Add an B<Accept-Encoding> header to your request. Furl inflates response bodies transparently according to the B<Content-Encoding> response header.

=item How do you use multipart/form-data?

You can use multipart/form-data with L<HTTP::Request::Common>.

    use HTTP::Request::Common;

    my $furl = Furl->new();
    $req = POST 'http://www.perl.org/survey.cgi',
      Content_Type => 'form-data',
      Content      => [
        name   => 'Hiromu Tokunaga',
        email  => 'tokuhirom@example.com',
        gender => 'F',
        born   => '1978',
        init   => ["$ENV{HOME}/.profile"],
      ];
    $furl->request($req);

lib/Furl/HTTP.pm  view on Meta::CPAN


RFC 2616 section 9.4 says:

    The HEAD method is identical to GET except that the server MUST NOT
    return a message-body in the response.

Some web applications, however, returns message bodies on the HEAD method,
which might confuse C<Keep-Alive> processes, so Furl closes connection in
such cases.

Anyway, the HEAD method is not so useful nowadays. The GET method and
C<If-Modified-Since> are more suitable to cache HTTP contents.

=item Why does Furl take longer than specified until it returns a timeout error?

Although Furl itself supports timeout, some underlying modules / functions do not. And the most noticeable one is L<Socket::inet_aton>, the function used for name resolution (a function that converts host names to IP addresses). If you need accurate ...

    use Net::DNS::Lite qw();

    my $furl = Furl->new(
        timeout   => $my_timeout_in_seconds,
        inet_aton => sub { Net::DNS::Lite::inet_aton(@_) },
    );

=item How can I replace Host header instead of hostname?

Furl::HTTP does not provide a way to replace the Host header because such a design leads to security issues.

If you want to send HTTP requests to a dedicated server (or a UNIX socket), you should use the B<get_address> callback to designate the peer to which L<Furl> should connect as B<sockaddr>.

The example below sends all requests to 127.0.0.1:8080.

    my $ua = Furl::HTTP->new(
        get_address => sub {
            my ($host, $port, $timeout) = @_;
            pack_sockaddr_in(8080, inet_aton("127.0.0.1"));
        },
    );

    my ($minor_version, $code, $msg, $headers, $body) = $furl->request(
        url => 'http://example.com/foo',
        method => 'GET'
    );

=back

=head1 TODO

    - AnyEvent::Furl?
    - ipv6 support
    - better docs for NO_PROXY

=head1 OPTIONAL FEATURES

=head2 Internationalized Domain Name (IDN)

This feature requires Net::IDN::Encode.

=head2 SSL

This feature requires IO::Socket::SSL.

=head2 Content-Encoding (deflate, gzip)

This feature requires Compress::Raw::Zlib.

=head1 DEVELOPMENT

To setup your environment:

    $ git clone http://github.com/tokuhirom/Furl.git
    $ cd Furl

To get picohttpparser:

    $ git submodule init
    $ git submodule update

    $ perl Makefile.PL
    $ make
    $ sudo make install

=head2 HOW TO CONTRIBUTE

Please send the pull request via L<http://github.com/tokuhirom/Furl/>.

=head1 SEE ALSO

L<LWP>

HTTP specs:
L<http://www.w3.org/Protocols/HTTP/1.0/spec.html>
L<http://www.w3.org/Protocols/HTTP/1.1/spec.html>

=head1 LICENSE

Copyright (C) Tokuhiro Matsuno.

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

=cut



( run in 1.126 second using v1.01-cache-2.11-cpan-39bf76dae61 )