File-HTTP

 view release on metacpan or  search on metacpan

lib/File/HTTP.pm  view on Meta::CPAN

# open a filehanlde to an HTTP URL and read it as if it was a seekable file
package File::HTTP;
use strict;
use warnings;
use Carp;
use Symbol ();
use Socket ();
use Errno ();
use Fcntl ();
use Exporter;
use bytes ();
use Time::HiRes qw(time);
use constant 1.03; # hash ref, perl 5.7.2

# on demand modules:
# - Time::y2038 or Time::Local
# - IO::Socket::SSL

our $VERSION = '1.11';

our @EXPORT_OK = qw(
	open stat open_at open_stream slurp_stream get post
	opendir readdir rewinddir telldir seekdir closedir 
	opendir_slash
	_e _s
);

our %EXPORT_TAGS = ( 
	all	=> \@EXPORT_OK,
	open	=> [qw(open stat _s _e)],
	opendir	=> [qw(opendir readdir rewinddir telldir seekdir closedir)],
);

sub import {
	if (grep {$_ eq '-everywhere'} @_) {
		@_ = grep {$_ ne '-everywhere'} @_;
		eval join(';', map {"*CORE::GLOBAL::$_ = \\&File::HTTP::$_"} qw(open stat opendir readdir rewinddir telldir seekdir closedir));
	}
	goto \&Exporter::import;
}

use constant DEBUG => 0;

# define instance variables
use constant FIELDS => qw(
	URL
	PROTO
	HOST
	REMOTE_HOST
	OFFSET
	CURRENT_OFFSET
	CONTENT_LENGTH
	PORT
	PATH
	REAL_PATH
	IP
	NETLOC
	CONNECT_NETLOC
	MTIME
	LAST_MODIFIED
	CONTENT_TYPE
	HTTP_VERSION
	FH
	FH_STAT
	LAST_READ
	AUTH
	LAST_HEADERS_SIZE
	SSL
	
	REQUEST_TIME
	RESPONSE_TIME

	NO_CLOSE_ON_DESTROY
	
	DIR_LIST
	DIR_POS
);

lib/File/HTTP.pm  view on Meta::CPAN

		elsif ($self->[OFFSET] > $self->[CURRENT_OFFSET] && $self->[OFFSET]-$self->[CURRENT_OFFSET] < $MAX_LENGTH_SKIP+$self->[LAST_HEADERS_SIZE]) {
			DEBUG && warn "skip\n";
			my $to_skip = $self->[OFFSET]-$self->[CURRENT_OFFSET];
			$self->_read(my $buf, $to_skip)==$to_skip or return;
			$self->[CURRENT_OFFSET] = $self->[OFFSET];
			$self->[LAST_READ] = time;
			return 1;
		}
		DEBUG && print STDERR "[close]";
	}
	elsif (DEBUG) {
		warn "not connected";
	}

	$REQUEST_HEADERS = ($REQUEST_HEADERS && ref $REQUEST_HEADERS) ? $$REQUEST_HEADERS : do {
		my $http_version = defined($self->[OFFSET]) ? '1.1' : '1.0';
		my @h = (
			"GET $self->[PATH] HTTP/$http_version",
			"Host: $self->[NETLOC]",
			"User-Agent: $USER_AGENT",
			"Connection: close",
		);
		# push @h, "Proxy-Connection: close" if $self->[CONNECT_NETLOC] && $self->[PROTO] ne 'HTTPS';
		push @h, "Range: bytes=$self->[OFFSET]-" if defined $self->[OFFSET];
		push @h, "Authorization: Basic ". MIME::Base64::encode_base64($self->[AUTH]) if $self->[AUTH];
	
	 	join("\015\012", @h, '', '')
	};

	die "error: ".&Errno::EFAULT unless $self->[IP]; # Bad address

	if ($self->[FH]) {
		# shutdown($self->[FH], 2);
		CORE::close($self->[FH]);
		# select(undef, undef, undef, 0.1);
	}
	$self->[FH] = undef;
	$self->[REQUEST_TIME] = time;
	($self->[HTTP_VERSION]) = $REQUEST_HEADERS =~m! HTTP/(\d+\.\d+)\r?\n!;
	$self->[HTTP_VERSION] += 0;
	$self->[LAST_HEADERS_SIZE] = 0;
	socket($self->[FH], AF_INET, SOCK_STREAM, IPPROTO_TCP) || die $!;
	# setsockopt($self->[FH], SOL_SOCKET, SO_LINGER, DONT_LINGER) || die $!;

	select((select($self->[FH]), $|=1)[0]); # autoflush
	for (1..10) {
		my $t = $DEBUG_SLOW_CONNECTION && time;
		my $status = connect($self->[FH], Socket::sockaddr_in($self->[PORT], $self->[IP]));
		if ($DEBUG_SLOW_CONNECTION && time-$t >= .4) {
			warn sprintf "\nSLOW %s CONNECTION to %s:%d: %s", ($status ? 'SUCCESS' : 'FAILED'), $self->[HOST], $self->[PORT], time-$t;
		}
		last if $status;
		die $! unless $_ < 3 && $! =~ /Interrupted system call/i;
	}
	
	$self->[FH_STAT] ||= [ CORE::stat($self->[FH]) ];

	if ($self->[PROTO] eq 'HTTPS') {
		$self->[SSL] = 1;
		unless ($SSL_LOADED) {
			eval {require IO::Socket::SSL;1} || croak "HTTPS support requires IO::Socket::SSL: $@";
			$SSL_LOADED = 1;
		}
		if ($self->[CONNECT_NETLOC]) {
			my ($code, $headers) = $self->_handshake(
				join("\015\012",
					"CONNECT $self->[CONNECT_NETLOC] HTTP/1.0",
					"User-Agent: ". ($TUNNELING_USER_AGENT||$USER_AGENT),
					'',
					''
				)
			);
			die "error: HTTP error $code from proxy during CONNECT\n" unless $code == 200;
		}

		IO::Socket::SSL->start_SSL($self->[FH],
			SSL_verifycn_name => $self->[REMOTE_HOST],
			SSL_hostname => $self->[REMOTE_HOST],
			SSL_session_cache_size => 100,
			SSL_verify_mode => &IO::Socket::SSL::SSL_VERIFY_NONE,
		);
	}

	(my $code, $RESPONSE_HEADERS) = $self->_handshake($REQUEST_HEADERS);

	$self->[RESPONSE_TIME] = time;

	my $code_ok = do {
		if (defined $self->[OFFSET]) {
			$code == 206
		} else {
			$code == 200 || $code == 204
		}
	};

	if (!$code_ok) {
		if ($code =~ /^3/ && $RESPONSE_HEADERS =~ /\015?\012Location: ([^\015\012]+)/i) {
			die "redirection: $1\n" unless $IGNORE_REDIRECTIONS;
		}
		elsif (!$IGNORE_ERRORS) {
			$self->[CONTENT_LENGTH] ||= ($RESPONSE_HEADERS =~ /\015?\012Content-Length: (\d+)/i && $1);
			if ($code =~ /^200$|^416$/ && $self->[OFFSET] >= $self->[CONTENT_LENGTH]) {
				DEBUG && warn "out of range\n";
				CORE::open($self->[FH] = undef, '<', '/dev/null') || CORE::open($self->[FH] = undef, '<', 'nul');
			} else {
				$! = $HTTP2FS_error{$code} || &Errno::ENOSYS; # ENOSYS: Function not implemented
				$VERBOSE && $code==200 && carp "Server does not support range queries. Consider using open_stream() instead of open()";
				die "error: ", 0+$!, "\n";
			}
		}
	}
	if ($RESPONSE_HEADERS =~ m!\015?\012Transfert-Encoding: +chunked!i && $self->[HTTP_VERSION] <= 1) {
		$! = $HTTP2FS_error{$code} || &Errno::ENOSYS; # ENOSYS: Function not implemented
		die "error: ", 0+$!, "\n";
	}
	
	unless (defined $self->[CONTENT_LENGTH]) {
		($self->[CONTENT_LENGTH]) = $RESPONSE_HEADERS =~ m!\015?\012Content-Range: +bytes +\d*-\d*/(\d+)!i;
		unless (defined $self->[CONTENT_LENGTH]) {
			($self->[CONTENT_LENGTH]) = $RESPONSE_HEADERS =~ m!\015?\012Content-Length: (\d+)!i;
		}
	}
	unless (defined $self->[CONTENT_TYPE]) {
		($self->[CONTENT_TYPE]) = $RESPONSE_HEADERS =~ m!\015?\012Content-Type: +([^\015\012]+)!i;
	}
	unless (defined $self->[LAST_MODIFIED]) {
		($self->[LAST_MODIFIED]) = $RESPONSE_HEADERS =~ m!\015?\012Last-Modified: +([^\015\012]+)!i;
	}
	
	return unless defined $self->[OFFSET];
	
	$self->[LAST_READ] = $self->[RESPONSE_TIME];
	$self->[CURRENT_OFFSET] = $self->[OFFSET];
	return 1;
}

# read() reimplementation to overcome IO::Socket::SSL behavior of read() acting as sysread()
# <> is ok though
sub _read {
	my ($self, undef, $len, $off) = @_;
	
	if (not defined $off)  {
		$off = 0;
	}
	elsif ($off < 0) {
		$off += bytes::length($_[1])
	}
	
	my $n = read($self->[FH], $_[1], $len, $off);
	return $n unless $n;
	
	if ($self->[SSL] && $len && $n < $len) {
		# strange IO::Socket::SSL behavior: read() acts as sysread()
		while ($n < $len) {
			my $n_part = read($self->[FH], $_[1], $len-$n, $off+$n);
			return $n unless $n_part;
			$n += $n_part;
		}
	}
	
	return $n;
}

sub TIEHANDLE {
	my ($class, $url, $offset, $err_ref, $no_close_on_destroy) = @_;
	my $self = bless [], $class;
	my $redirections = 0;

	$self->[NO_CLOSE_ON_DESTROY] = $no_close_on_destroy;

	SET_URL: {
		$self->[URL] = $url;
		$self->[OFFSET] = $offset;
		$self->[CURRENT_OFFSET] = $offset;
		($self->[PROTO], $self->[AUTH], $self->[HOST], $self->[PORT], $self->[PATH]) = $url =~ m!^(https?)://(?:([^/:]+:[^/@]+)@)?([^/:]+)(?:\:(\d+))?(/[^#]+)?!i;
		$self->[REMOTE_HOST] = $self->[HOST];

		if ($self->[AUTH]) {
			require MIME::Base64;
			#$VERBOSE && carp "authentication in URI is not supported";
			#$$err_ref = &Errno::EFAULT; # Bad address
			#return undef;
		}
		$self->[PROTO] = uc($self->[PROTO]);
		$self->[PORT] ||= $Proto2Port{$self->[PROTO]};
		$self->[PATH] ||= '/';
		$self->[NETLOC] = ($self->[PORT]==$Proto2Port{$self->[PROTO]}) ? $self->[HOST] : "$self->[HOST]:$self->[PORT]";
		$self->[CONNECT_NETLOC] = '';
		
		# PATH will change in case of proxy
		$self->[REAL_PATH] = $self->[PATH]; 
		
		# handle proxy
		my $proxy = $self->[PROTO] eq 'HTTPS' ? $ENV{HTTPS_PROXY}||$ENV{HTTP_PROXY} : $ENV{HTTP_PROXY};
		if ($proxy) {
			my $no_proxy = join('|', map {s/^\*?\.//;$_} split(/[, ]+/, $ENV{NO_PROXY}||''));
			
			unless (
				($self->[HOST] eq '127.0.0.1')
				||
				($self->[HOST] eq 'localhost')
				||
				($no_proxy && $self->[HOST] =~ /$no_proxy$/i)
			) {
				# apply proxy
				if ($proxy =~ m!^https://!) {
					$VERBOSE && carp "proxies with HTTPS address are not supported";
					$$err_ref = &Errno::EFAULT; # Bad address
					return undef;
				}
				$self->[CONNECT_NETLOC] = "$self->[HOST]:$self->[PORT]";
				($self->[HOST], $self->[PORT]) = $proxy =~ m!^(?:http://)?([^/:]+)(?:\:(\d+))?!i;
				$self->[PORT] ||= $Proto2Port{$self->[PROTO]};



( run in 0.544 second using v1.01-cache-2.11-cpan-39bf76dae61 )