File-HTTP
view release on metacpan or search on metacpan
lib/File/HTTP.pm view on Meta::CPAN
# open a filehanlde to an HTTP URL and read it as if it was a seekable file
package File::HTTP;
use strict;
use warnings;
use Carp;
use Symbol ();
use Socket ();
use Errno ();
use Fcntl ();
use Exporter;
use bytes ();
use Time::HiRes qw(time);
use constant 1.03; # hash ref, perl 5.7.2
# on demand modules:
# - Time::y2038 or Time::Local
# - IO::Socket::SSL
our $VERSION = '1.11';
our @EXPORT_OK = qw(
open stat open_at open_stream slurp_stream get post
opendir readdir rewinddir telldir seekdir closedir
opendir_slash
_e _s
);
our %EXPORT_TAGS = (
all => \@EXPORT_OK,
open => [qw(open stat _s _e)],
opendir => [qw(opendir readdir rewinddir telldir seekdir closedir)],
);
sub import {
if (grep {$_ eq '-everywhere'} @_) {
@_ = grep {$_ ne '-everywhere'} @_;
eval join(';', map {"*CORE::GLOBAL::$_ = \\&File::HTTP::$_"} qw(open stat opendir readdir rewinddir telldir seekdir closedir));
}
goto \&Exporter::import;
}
use constant DEBUG => 0;
# define instance variables
use constant FIELDS => qw(
URL
PROTO
HOST
REMOTE_HOST
OFFSET
CURRENT_OFFSET
CONTENT_LENGTH
PORT
PATH
REAL_PATH
IP
NETLOC
CONNECT_NETLOC
MTIME
LAST_MODIFIED
CONTENT_TYPE
HTTP_VERSION
FH
FH_STAT
LAST_READ
AUTH
LAST_HEADERS_SIZE
SSL
REQUEST_TIME
RESPONSE_TIME
NO_CLOSE_ON_DESTROY
DIR_LIST
DIR_POS
);
lib/File/HTTP.pm view on Meta::CPAN
elsif ($self->[OFFSET] > $self->[CURRENT_OFFSET] && $self->[OFFSET]-$self->[CURRENT_OFFSET] < $MAX_LENGTH_SKIP+$self->[LAST_HEADERS_SIZE]) {
DEBUG && warn "skip\n";
my $to_skip = $self->[OFFSET]-$self->[CURRENT_OFFSET];
$self->_read(my $buf, $to_skip)==$to_skip or return;
$self->[CURRENT_OFFSET] = $self->[OFFSET];
$self->[LAST_READ] = time;
return 1;
}
DEBUG && print STDERR "[close]";
}
elsif (DEBUG) {
warn "not connected";
}
$REQUEST_HEADERS = ($REQUEST_HEADERS && ref $REQUEST_HEADERS) ? $$REQUEST_HEADERS : do {
my $http_version = defined($self->[OFFSET]) ? '1.1' : '1.0';
my @h = (
"GET $self->[PATH] HTTP/$http_version",
"Host: $self->[NETLOC]",
"User-Agent: $USER_AGENT",
"Connection: close",
);
# push @h, "Proxy-Connection: close" if $self->[CONNECT_NETLOC] && $self->[PROTO] ne 'HTTPS';
push @h, "Range: bytes=$self->[OFFSET]-" if defined $self->[OFFSET];
push @h, "Authorization: Basic ". MIME::Base64::encode_base64($self->[AUTH]) if $self->[AUTH];
join("\015\012", @h, '', '')
};
die "error: ".&Errno::EFAULT unless $self->[IP]; # Bad address
if ($self->[FH]) {
# shutdown($self->[FH], 2);
CORE::close($self->[FH]);
# select(undef, undef, undef, 0.1);
}
$self->[FH] = undef;
$self->[REQUEST_TIME] = time;
($self->[HTTP_VERSION]) = $REQUEST_HEADERS =~m! HTTP/(\d+\.\d+)\r?\n!;
$self->[HTTP_VERSION] += 0;
$self->[LAST_HEADERS_SIZE] = 0;
socket($self->[FH], AF_INET, SOCK_STREAM, IPPROTO_TCP) || die $!;
# setsockopt($self->[FH], SOL_SOCKET, SO_LINGER, DONT_LINGER) || die $!;
select((select($self->[FH]), $|=1)[0]); # autoflush
for (1..10) {
my $t = $DEBUG_SLOW_CONNECTION && time;
my $status = connect($self->[FH], Socket::sockaddr_in($self->[PORT], $self->[IP]));
if ($DEBUG_SLOW_CONNECTION && time-$t >= .4) {
warn sprintf "\nSLOW %s CONNECTION to %s:%d: %s", ($status ? 'SUCCESS' : 'FAILED'), $self->[HOST], $self->[PORT], time-$t;
}
last if $status;
die $! unless $_ < 3 && $! =~ /Interrupted system call/i;
}
$self->[FH_STAT] ||= [ CORE::stat($self->[FH]) ];
if ($self->[PROTO] eq 'HTTPS') {
$self->[SSL] = 1;
unless ($SSL_LOADED) {
eval {require IO::Socket::SSL;1} || croak "HTTPS support requires IO::Socket::SSL: $@";
$SSL_LOADED = 1;
}
if ($self->[CONNECT_NETLOC]) {
my ($code, $headers) = $self->_handshake(
join("\015\012",
"CONNECT $self->[CONNECT_NETLOC] HTTP/1.0",
"User-Agent: ". ($TUNNELING_USER_AGENT||$USER_AGENT),
'',
''
)
);
die "error: HTTP error $code from proxy during CONNECT\n" unless $code == 200;
}
IO::Socket::SSL->start_SSL($self->[FH],
SSL_verifycn_name => $self->[REMOTE_HOST],
SSL_hostname => $self->[REMOTE_HOST],
SSL_session_cache_size => 100,
SSL_verify_mode => &IO::Socket::SSL::SSL_VERIFY_NONE,
);
}
(my $code, $RESPONSE_HEADERS) = $self->_handshake($REQUEST_HEADERS);
$self->[RESPONSE_TIME] = time;
my $code_ok = do {
if (defined $self->[OFFSET]) {
$code == 206
} else {
$code == 200 || $code == 204
}
};
if (!$code_ok) {
if ($code =~ /^3/ && $RESPONSE_HEADERS =~ /\015?\012Location: ([^\015\012]+)/i) {
die "redirection: $1\n" unless $IGNORE_REDIRECTIONS;
}
elsif (!$IGNORE_ERRORS) {
$self->[CONTENT_LENGTH] ||= ($RESPONSE_HEADERS =~ /\015?\012Content-Length: (\d+)/i && $1);
if ($code =~ /^200$|^416$/ && $self->[OFFSET] >= $self->[CONTENT_LENGTH]) {
DEBUG && warn "out of range\n";
CORE::open($self->[FH] = undef, '<', '/dev/null') || CORE::open($self->[FH] = undef, '<', 'nul');
} else {
$! = $HTTP2FS_error{$code} || &Errno::ENOSYS; # ENOSYS: Function not implemented
$VERBOSE && $code==200 && carp "Server does not support range queries. Consider using open_stream() instead of open()";
die "error: ", 0+$!, "\n";
}
}
}
if ($RESPONSE_HEADERS =~ m!\015?\012Transfert-Encoding: +chunked!i && $self->[HTTP_VERSION] <= 1) {
$! = $HTTP2FS_error{$code} || &Errno::ENOSYS; # ENOSYS: Function not implemented
die "error: ", 0+$!, "\n";
}
unless (defined $self->[CONTENT_LENGTH]) {
($self->[CONTENT_LENGTH]) = $RESPONSE_HEADERS =~ m!\015?\012Content-Range: +bytes +\d*-\d*/(\d+)!i;
unless (defined $self->[CONTENT_LENGTH]) {
($self->[CONTENT_LENGTH]) = $RESPONSE_HEADERS =~ m!\015?\012Content-Length: (\d+)!i;
}
}
unless (defined $self->[CONTENT_TYPE]) {
($self->[CONTENT_TYPE]) = $RESPONSE_HEADERS =~ m!\015?\012Content-Type: +([^\015\012]+)!i;
}
unless (defined $self->[LAST_MODIFIED]) {
($self->[LAST_MODIFIED]) = $RESPONSE_HEADERS =~ m!\015?\012Last-Modified: +([^\015\012]+)!i;
}
return unless defined $self->[OFFSET];
$self->[LAST_READ] = $self->[RESPONSE_TIME];
$self->[CURRENT_OFFSET] = $self->[OFFSET];
return 1;
}
# read() reimplementation to overcome IO::Socket::SSL behavior of read() acting as sysread()
# <> is ok though
sub _read {
my ($self, undef, $len, $off) = @_;
if (not defined $off) {
$off = 0;
}
elsif ($off < 0) {
$off += bytes::length($_[1])
}
my $n = read($self->[FH], $_[1], $len, $off);
return $n unless $n;
if ($self->[SSL] && $len && $n < $len) {
# strange IO::Socket::SSL behavior: read() acts as sysread()
while ($n < $len) {
my $n_part = read($self->[FH], $_[1], $len-$n, $off+$n);
return $n unless $n_part;
$n += $n_part;
}
}
return $n;
}
sub TIEHANDLE {
my ($class, $url, $offset, $err_ref, $no_close_on_destroy) = @_;
my $self = bless [], $class;
my $redirections = 0;
$self->[NO_CLOSE_ON_DESTROY] = $no_close_on_destroy;
SET_URL: {
$self->[URL] = $url;
$self->[OFFSET] = $offset;
$self->[CURRENT_OFFSET] = $offset;
($self->[PROTO], $self->[AUTH], $self->[HOST], $self->[PORT], $self->[PATH]) = $url =~ m!^(https?)://(?:([^/:]+:[^/@]+)@)?([^/:]+)(?:\:(\d+))?(/[^#]+)?!i;
$self->[REMOTE_HOST] = $self->[HOST];
if ($self->[AUTH]) {
require MIME::Base64;
#$VERBOSE && carp "authentication in URI is not supported";
#$$err_ref = &Errno::EFAULT; # Bad address
#return undef;
}
$self->[PROTO] = uc($self->[PROTO]);
$self->[PORT] ||= $Proto2Port{$self->[PROTO]};
$self->[PATH] ||= '/';
$self->[NETLOC] = ($self->[PORT]==$Proto2Port{$self->[PROTO]}) ? $self->[HOST] : "$self->[HOST]:$self->[PORT]";
$self->[CONNECT_NETLOC] = '';
# PATH will change in case of proxy
$self->[REAL_PATH] = $self->[PATH];
# handle proxy
my $proxy = $self->[PROTO] eq 'HTTPS' ? $ENV{HTTPS_PROXY}||$ENV{HTTP_PROXY} : $ENV{HTTP_PROXY};
if ($proxy) {
my $no_proxy = join('|', map {s/^\*?\.//;$_} split(/[, ]+/, $ENV{NO_PROXY}||''));
unless (
($self->[HOST] eq '127.0.0.1')
||
($self->[HOST] eq 'localhost')
||
($no_proxy && $self->[HOST] =~ /$no_proxy$/i)
) {
# apply proxy
if ($proxy =~ m!^https://!) {
$VERBOSE && carp "proxies with HTTPS address are not supported";
$$err_ref = &Errno::EFAULT; # Bad address
return undef;
}
$self->[CONNECT_NETLOC] = "$self->[HOST]:$self->[PORT]";
($self->[HOST], $self->[PORT]) = $proxy =~ m!^(?:http://)?([^/:]+)(?:\:(\d+))?!i;
$self->[PORT] ||= $Proto2Port{$self->[PROTO]};
( run in 0.544 second using v1.01-cache-2.11-cpan-39bf76dae61 )